Merge branch 'develop' into collected-small-fixes
This commit is contained in:
@ -49,8 +49,8 @@ if(DOWNLOAD_KOKKOS)
|
||||
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
|
||||
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
|
||||
include(ExternalProject)
|
||||
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.7.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
|
||||
set(KOKKOS_MD5 "f140e02b826223b1045207d9bc10d404" CACHE STRING "MD5 checksum of KOKKOS tarball")
|
||||
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.7.02.tar.gz" CACHE STRING "URL for KOKKOS tarball")
|
||||
set(KOKKOS_MD5 "34d7860d548c06a4040236d959c9f99a" CACHE STRING "MD5 checksum of KOKKOS tarball")
|
||||
mark_as_advanced(KOKKOS_URL)
|
||||
mark_as_advanced(KOKKOS_MD5)
|
||||
GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK)
|
||||
@ -75,7 +75,7 @@ if(DOWNLOAD_KOKKOS)
|
||||
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
|
||||
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
|
||||
elseif(EXTERNAL_KOKKOS)
|
||||
find_package(Kokkos 3.7.01 REQUIRED CONFIG)
|
||||
find_package(Kokkos 3.7.02 REQUIRED CONFIG)
|
||||
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
|
||||
else()
|
||||
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)
|
||||
|
||||
@ -19,7 +19,7 @@ Syntax
|
||||
* ex,ey,ez = E-field component values (electric field units)
|
||||
* any of ex,ey,ez can be a variable (see below)
|
||||
* zero or more keyword/value pairs may be appended to args
|
||||
* keyword = *region* or *energy*
|
||||
* keyword = *region* or *energy* or *potential*
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
@ -27,6 +27,8 @@ Syntax
|
||||
region-ID = ID of region atoms must be in to have added force
|
||||
*energy* value = v_name
|
||||
v_name = variable with name that calculates the potential energy of each atom in the added E-field
|
||||
*potential* value = v_name
|
||||
v_name = variable with name that calculates the electric potential of each atom in the added E-field
|
||||
|
||||
Examples
|
||||
""""""""
|
||||
@ -112,7 +114,8 @@ one or more variables, and if you are performing dynamics via the
|
||||
:doc:`run <run>` command. If the keyword is not used, LAMMPS will set
|
||||
the energy to 0.0, which is typically fine for dynamics.
|
||||
|
||||
The *energy* keyword is required if the added force is defined with
|
||||
The *energy* keyword (or *potential* keyword, described below)
|
||||
is required if the added force is defined with
|
||||
one or more variables, and you are performing energy minimization via
|
||||
the "minimize" command for charged particles. It is not required for
|
||||
point-dipoles, but a warning is issued since the minimizer in LAMMPS
|
||||
@ -122,7 +125,7 @@ minimize the orientation of dipoles in an applied electric field.
|
||||
The *energy* keyword specifies the name of an atom-style
|
||||
:doc:`variable <variable>` which is used to compute the energy of each
|
||||
atom as function of its position. Like variables used for *ex*,
|
||||
*ey*, *ez*, the energy variable is specified as v_name, where name
|
||||
*ey*, *ez*, the energy variable is specified as "v_name", where "name"
|
||||
is the variable name.
|
||||
|
||||
Note that when the *energy* keyword is used during an energy
|
||||
@ -133,6 +136,27 @@ due to the electric field were a spring-like F = kx, then the energy
|
||||
formula should be E = -0.5kx\^2. If you don't do this correctly, the
|
||||
minimization will not converge properly.
|
||||
|
||||
.. versionadded:: TBD
|
||||
|
||||
The *potential* keyword can be used as an alternative to the *energy* keyword
|
||||
to specify the name of an atom-style variable, which is used to compute the
|
||||
added electric potential to each atom as a function of its position. The
|
||||
variable should have units of electric field multiplied by distance (that is,
|
||||
in `units real`, the potential should be in volts). As with the *energy*
|
||||
keyword, the variable name is specified as "v_name". The energy added by this
|
||||
fix is then calculated as the electric potential multiplied by charge.
|
||||
|
||||
The *potential* keyword is mainly intended for correct charge
|
||||
equilibration in simulations with :doc:`fix qeq/reaxff<fix_qeq_reaxff>`,
|
||||
since with variable charges the electric potential can be known
|
||||
beforehand but the energy cannot. A small additional benefit is that
|
||||
the *energy* keyword requires an additional conversion to energy units
|
||||
which the *potential* keyword avoids. Thus, when the *potential*
|
||||
keyword is specified, the *energy* keyword must not be used. As with
|
||||
*energy*, the *potential* keyword is not allowed if the added field is a
|
||||
constant vector. The *potential* keyword is not supported by *fix
|
||||
efield/tip4p*.
|
||||
|
||||
----------
|
||||
|
||||
Restart, fix_modify, output, run start/stop, minimize info
|
||||
|
||||
@ -128,9 +128,12 @@ periodic cell dimensions less than 10 Angstroms.
|
||||
|
||||
This fix may be used in combination with :doc:`fix efield <fix_efield>`
|
||||
and will apply the external electric field during charge equilibration,
|
||||
but there may be only one fix efield instance used, it may only use a
|
||||
constant electric field, and the electric field vector may only have
|
||||
components in non-periodic directions.
|
||||
but there may be only one fix efield instance used and the electric field
|
||||
vector may only have components in non-periodic directions. Equal-style
|
||||
variables can be used for electric field vector components without any further
|
||||
settings. Atom-style variables can be used for spatially-varying electric field
|
||||
vector components, but the resulting electric potential must be specified
|
||||
as an atom-style variable using the *potential* keyword for `fix efield`.
|
||||
|
||||
Related commands
|
||||
""""""""""""""""
|
||||
|
||||
@ -67,7 +67,7 @@ Syntax
|
||||
bound(group,dir,region), gyration(group,region), ke(group,reigon),
|
||||
angmom(group,dim,region), torque(group,dim,region),
|
||||
inertia(group,dimdim,region), omega(group,dim,region)
|
||||
special functions = sum(x), min(x), max(x), ave(x), trap(x), slope(x), gmask(x), rmask(x), grmask(x,y), next(x), is_file(name), is_os(name), extract_setting(name), label2type(kind,label)
|
||||
special functions = sum(x), min(x), max(x), ave(x), trap(x), slope(x), gmask(x), rmask(x), grmask(x,y), next(x), is_file(name), is_os(name), extract_setting(name), label2type(kind,label), is_typelabel(kind,label)
|
||||
feature functions = is_available(category,feature), is_active(category,feature), is_defined(category,id)
|
||||
atom value = id[i], mass[i], type[i], mol[i], x[i], y[i], z[i], vx[i], vy[i], vz[i], fx[i], fy[i], fz[i], q[i]
|
||||
atom vector = id, mass, type, mol, radius, q, x, y, z, vx, vy, vz, fx, fy, fz
|
||||
@ -532,7 +532,7 @@ variables.
|
||||
+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| Region functions | count(ID,IDR), mass(ID,IDR), charge(ID,IDR), xcm(ID,dim,IDR), vcm(ID,dim,IDR), fcm(ID,dim,IDR), bound(ID,dir,IDR), gyration(ID,IDR), ke(ID,IDR), angmom(ID,dim,IDR), torque(ID,dim,IDR), inertia(ID,dimdim,IDR), omega(ID,dim,IDR) |
|
||||
+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| Special functions | sum(x), min(x), max(x), ave(x), trap(x), slope(x), gmask(x), rmask(x), grmask(x,y), next(x), is_file(name), is_os(name), extract_setting(name), label2type(kind,label) |
|
||||
| Special functions | sum(x), min(x), max(x), ave(x), trap(x), slope(x), gmask(x), rmask(x), grmask(x,y), next(x), is_file(name), is_os(name), extract_setting(name), label2type(kind,label), is_typelabel(kind,label) |
|
||||
+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| Feature functions | is_available(category,feature), is_active(category,feature), is_defined(category,id) |
|
||||
+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
@ -996,10 +996,17 @@ via the link in this paragraph.
|
||||
|
||||
The label2type(kind,label) function converts type labels into numeric
|
||||
types, using label maps created by the :doc:`labelmap <labelmap>` or
|
||||
:doc:`read_data <read_data>` commands. The first argument is the
|
||||
label map kind (atom, bond, angle, dihedral, or improper) and the
|
||||
second argument is the label. The function returns the corresponding
|
||||
numeric type.
|
||||
:doc:`read_data <read_data>` commands. The first argument is the label
|
||||
map kind (atom, bond, angle, dihedral, or improper) and the second
|
||||
argument is the label. The function returns the corresponding numeric
|
||||
type or triggers an error if the queried label does not exist.
|
||||
|
||||
.. versionadded:: TBD
|
||||
|
||||
The is_typelabel(kind,label) function has the same arguments as
|
||||
label2type(), but returns 1 if the type label has been assigned,
|
||||
otherwise it returns 0. This function can be used to check if a
|
||||
particular type label already exists in the simulation.
|
||||
|
||||
----------
|
||||
|
||||
|
||||
@ -37,4 +37,4 @@ velocity all create 300.0 41279 loop geom
|
||||
fix 1 all nve
|
||||
fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c
|
||||
|
||||
run 100
|
||||
run $t
|
||||
|
||||
1
examples/snap/C_SNAP_2021.10.15.quadratic.snapcoeff
Symbolic link
1
examples/snap/C_SNAP_2021.10.15.quadratic.snapcoeff
Symbolic link
@ -0,0 +1 @@
|
||||
../../potentials/C_SNAP_2021.10.15.quadratic.snapcoeff
|
||||
1
examples/snap/C_SNAP_2021.10.15.quadratic.snapparam
Symbolic link
1
examples/snap/C_SNAP_2021.10.15.quadratic.snapparam
Symbolic link
@ -0,0 +1 @@
|
||||
../../potentials/C_SNAP_2021.10.15.quadratic.snapparam
|
||||
31
examples/snap/in.C_SNAP
Normal file
31
examples/snap/in.C_SNAP
Normal file
@ -0,0 +1,31 @@
|
||||
#Carbon SNAP example: 216 atom diamond unit cell simulated NVT at ~1,000GPa and 5,000K
|
||||
|
||||
units metal
|
||||
atom_style atomic
|
||||
boundary p p p
|
||||
|
||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
||||
# Crystal orientation and MD box creation #
|
||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
||||
lattice diamond 2.845
|
||||
region Bbox block 0 3 0 3 0 3
|
||||
create_box 1 Bbox
|
||||
create_atoms 1 region Bbox basis 1 1
|
||||
|
||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
||||
# Interatomic potential parameters #
|
||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
||||
# Specify hybrid with SNAP, ZBL, and long-range C_SNAP_2021.10.15.quadratic.ulomb
|
||||
|
||||
pair_style hybrid/overlay zbl 0.1 0.2 snap
|
||||
pair_coeff 1 1 zbl 10 10
|
||||
pair_coeff * * snap C_SNAP_2021.10.15.quadratic.snapcoeff C_SNAP_2021.10.15.quadratic.snapparam C
|
||||
|
||||
mass * 12.01
|
||||
velocity all create 8000.0 3412461 loop geom
|
||||
|
||||
fix NVE all nve
|
||||
fix NVT all langevin 5000.0 5000.0 0.1 3216548
|
||||
thermo 50
|
||||
thermo_style custom step temp ke pe etotal press pxx pyy pzz
|
||||
run 500
|
||||
105
examples/snap/log.30May23.C_SNAP.g++.1
Normal file
105
examples/snap/log.30May23.C_SNAP.g++.1
Normal file
@ -0,0 +1,105 @@
|
||||
LAMMPS (28 Mar 2023 - Development)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
#Carbon SNAP example: 216 atom diamond unit cell simulated NVT at ~1,000GPa and 5,000K
|
||||
|
||||
units metal
|
||||
atom_style atomic
|
||||
boundary p p p
|
||||
|
||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
||||
# Crystal orientation and MD box creation #
|
||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
||||
lattice diamond 2.845
|
||||
Lattice spacing in x,y,z = 2.845 2.845 2.845
|
||||
region Bbox block 0 3 0 3 0 3
|
||||
create_box 1 Bbox
|
||||
Created orthogonal box = (0 0 0) to (8.535 8.535 8.535)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
create_atoms 1 region Bbox basis 1 1
|
||||
Created 216 atoms
|
||||
using lattice units in orthogonal box = (0 0 0) to (8.535 8.535 8.535)
|
||||
create_atoms CPU = 0.000 seconds
|
||||
|
||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
||||
# Interatomic potential parameters #
|
||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
||||
# Specify hybrid with SNAP, ZBL, and long-range C_SNAP_2021.10.15.quadratic.ulomb
|
||||
|
||||
pair_style hybrid/overlay zbl 0.1 0.2 snap
|
||||
pair_coeff 1 1 zbl 10 10
|
||||
pair_coeff * * snap C_SNAP_2021.10.15.quadratic.snapcoeff C_SNAP_2021.10.15.quadratic.snapparam C
|
||||
SNAP Element = C, Radius 0.5, Weight 1
|
||||
SNAP keyword rcutfac 2.7
|
||||
SNAP keyword twojmax 8
|
||||
SNAP keyword rfac0 0.99363
|
||||
SNAP keyword rmin0 0.0
|
||||
SNAP keyword bzeroflag 0
|
||||
SNAP keyword quadraticflag 1
|
||||
|
||||
mass * 12.01
|
||||
velocity all create 8000.0 3412461 loop geom
|
||||
|
||||
fix NVE all nve
|
||||
fix NVT all langevin 5000.0 5000.0 0.1 3216548
|
||||
thermo 50
|
||||
thermo_style custom step temp ke pe etotal press pxx pyy pzz
|
||||
run 500
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 4.7
|
||||
ghost atom cutoff = 4.7
|
||||
binsize = 2.35, bins = 4 4 4
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair zbl, perpetual, half/full trim from (2)
|
||||
attributes: half, newton on, cut 2.2
|
||||
pair build: halffull/newton/trim
|
||||
stencil: none
|
||||
bin: none
|
||||
(2) pair snap, perpetual
|
||||
attributes: full, newton on
|
||||
pair build: full/bin/atomonly
|
||||
stencil: full/bin/3d
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 4.835 | 4.835 | 4.835 Mbytes
|
||||
Step Temp KinEng PotEng TotEng Press Pxx Pyy Pzz
|
||||
0 8000 222.32745 -846.66062 -624.33318 10234249 10210805 10267435 10224506
|
||||
50 4199.4229 116.70587 -723.2423 -606.53643 10256033 10157803 10280166 10330129
|
||||
100 3820.2509 106.16833 -710.43537 -604.26704 10232872 10216484 10241309 10240825
|
||||
150 4413.2948 122.64957 -710.09702 -587.44745 10254093 10323013 10265454 10173810
|
||||
200 4688.024 130.28455 -702.26198 -571.97742 10306186 10281632 10342390 10294536
|
||||
250 4997.165 138.87587 -720.58476 -581.70889 10284438 10220856 10360231 10272226
|
||||
300 4640.4911 128.96357 -710.75063 -581.78706 10263301 10264007 10290526 10235369
|
||||
350 4929.5117 136.99572 -707.2526 -570.25688 10290742 10359920 10284236 10228071
|
||||
400 4700.9354 130.64337 -697.90277 -567.2594 10250682 10277287 10246032 10228729
|
||||
450 5108.4971 141.96989 -700.57144 -558.60155 10289765 10323648 10306588 10239058
|
||||
500 5146.7039 143.03169 -700.33221 -557.30052 10334303 10349736 10358785 10294387
|
||||
Loop time of 36.7771 on 1 procs for 500 steps with 216 atoms
|
||||
|
||||
Performance: 1.175 ns/day, 20.432 hours/ns, 13.595 timesteps/s, 2.937 katom-step/s
|
||||
99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 36.766 | 36.766 | 36.766 | 0.0 | 99.97
|
||||
Neigh | 0.0010226 | 0.0010226 | 0.0010226 | 0.0 | 0.00
|
||||
Comm | 0.0033205 | 0.0033205 | 0.0033205 | 0.0 | 0.01
|
||||
Output | 0.00020657 | 0.00020657 | 0.00020657 | 0.0 | 0.00
|
||||
Modify | 0.0047621 | 0.0047621 | 0.0047621 | 0.0 | 0.01
|
||||
Other | | 0.001464 | | | 0.00
|
||||
|
||||
Nlocal: 216 ave 216 max 216 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 1746 ave 1746 max 1746 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 1767 ave 1767 max 1767 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
FullNghs: 32846 ave 32846 max 32846 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 32846
|
||||
Ave neighs/atom = 152.06481
|
||||
Neighbor list builds = 1
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:36
|
||||
105
examples/snap/log.30May23.C_SNAP.g++.4
Normal file
105
examples/snap/log.30May23.C_SNAP.g++.4
Normal file
@ -0,0 +1,105 @@
|
||||
LAMMPS (28 Mar 2023 - Development)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
#Carbon SNAP example: 216 atom diamond unit cell simulated NVT at ~1,000GPa and 5,000K
|
||||
|
||||
units metal
|
||||
atom_style atomic
|
||||
boundary p p p
|
||||
|
||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
||||
# Crystal orientation and MD box creation #
|
||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
||||
lattice diamond 2.845
|
||||
Lattice spacing in x,y,z = 2.845 2.845 2.845
|
||||
region Bbox block 0 3 0 3 0 3
|
||||
create_box 1 Bbox
|
||||
Created orthogonal box = (0 0 0) to (8.535 8.535 8.535)
|
||||
1 by 2 by 2 MPI processor grid
|
||||
create_atoms 1 region Bbox basis 1 1
|
||||
Created 216 atoms
|
||||
using lattice units in orthogonal box = (0 0 0) to (8.535 8.535 8.535)
|
||||
create_atoms CPU = 0.000 seconds
|
||||
|
||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
||||
# Interatomic potential parameters #
|
||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
||||
# Specify hybrid with SNAP, ZBL, and long-range C_SNAP_2021.10.15.quadratic.ulomb
|
||||
|
||||
pair_style hybrid/overlay zbl 0.1 0.2 snap
|
||||
pair_coeff 1 1 zbl 10 10
|
||||
pair_coeff * * snap C_SNAP_2021.10.15.quadratic.snapcoeff C_SNAP_2021.10.15.quadratic.snapparam C
|
||||
SNAP Element = C, Radius 0.5, Weight 1
|
||||
SNAP keyword rcutfac 2.7
|
||||
SNAP keyword twojmax 8
|
||||
SNAP keyword rfac0 0.99363
|
||||
SNAP keyword rmin0 0.0
|
||||
SNAP keyword bzeroflag 0
|
||||
SNAP keyword quadraticflag 1
|
||||
|
||||
mass * 12.01
|
||||
velocity all create 8000.0 3412461 loop geom
|
||||
|
||||
fix NVE all nve
|
||||
fix NVT all langevin 5000.0 5000.0 0.1 3216548
|
||||
thermo 50
|
||||
thermo_style custom step temp ke pe etotal press pxx pyy pzz
|
||||
run 500
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 4.7
|
||||
ghost atom cutoff = 4.7
|
||||
binsize = 2.35, bins = 4 4 4
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair zbl, perpetual, half/full trim from (2)
|
||||
attributes: half, newton on, cut 2.2
|
||||
pair build: halffull/newton/trim
|
||||
stencil: none
|
||||
bin: none
|
||||
(2) pair snap, perpetual
|
||||
attributes: full, newton on
|
||||
pair build: full/bin/atomonly
|
||||
stencil: full/bin/3d
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 4.681 | 4.681 | 4.681 Mbytes
|
||||
Step Temp KinEng PotEng TotEng Press Pxx Pyy Pzz
|
||||
0 8000 222.32745 -846.66062 -624.33318 10234249 10210805 10267435 10224506
|
||||
50 4382.3571 121.78978 -715.70492 -593.91513 10262157 10278549 10181545 10326376
|
||||
100 4546.1549 126.34188 -713.11818 -586.77631 10261694 10257647 10333666 10193770
|
||||
150 5109.4576 141.99658 -708.87952 -566.88294 10268132 10248182 10248240 10307974
|
||||
200 4764.2181 132.40206 -712.16881 -579.76675 10329903 10238991 10379394 10371323
|
||||
250 4989.5099 138.66313 -710.39748 -571.73435 10282678 10321057 10274124 10252854
|
||||
300 4853.3102 134.87801 -699.98167 -565.10366 10343314 10204138 10430172 10395634
|
||||
350 4788.1153 133.06618 -705.14381 -572.07763 10325571 10312657 10267999 10396058
|
||||
400 5055.7813 140.50487 -707.38537 -566.8805 10323176 10357258 10310733 10301536
|
||||
450 5182.3198 144.02149 -695.11614 -551.09465 10345564 10358486 10346325 10331881
|
||||
500 5311.077 147.59977 -691.32767 -543.7279 10308823 10242668 10214102 10469700
|
||||
Loop time of 11.5932 on 4 procs for 500 steps with 216 atoms
|
||||
|
||||
Performance: 3.726 ns/day, 6.441 hours/ns, 43.129 timesteps/s, 9.316 katom-step/s
|
||||
94.9% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 10.241 | 10.446 | 10.695 | 5.0 | 90.11
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 0.87613 | 1.1187 | 1.3179 | 14.9 | 9.65
|
||||
Output | 0.0002656 | 0.0010016 | 0.0015521 | 1.5 | 0.01
|
||||
Modify | 0.0019493 | 0.0020668 | 0.0021577 | 0.2 | 0.02
|
||||
Other | | 0.02508 | | | 0.22
|
||||
|
||||
Nlocal: 54 ave 54 max 54 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 1082 ave 1082 max 1082 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 432 ave 432 max 432 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
FullNghs: 8532 ave 8532 max 8532 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 34128
|
||||
Ave neighs/atom = 158
|
||||
Neighbor list builds = 0
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:11
|
||||
@ -1,4 +1,25 @@
|
||||
# Change Log
|
||||
# CHANGELOG
|
||||
|
||||
## [3.7.02](https://github.com/kokkos/kokkos/tree/3.7.02) (2023-05-17)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.01...3.7.02)
|
||||
|
||||
### Backends and Archs Enhancements:
|
||||
#### CUDA
|
||||
- Add Hopper support and update nvcc_wrapper to work with CUDA-12 [\#5693](https://github.com/kokkos/kokkos/pull/5693)
|
||||
### General Enhancements:
|
||||
- sprintf -> snprintf [\#5787](https://github.com/kokkos/kokkos/pull/5787)
|
||||
### Build System:
|
||||
- Add error message when not using `hipcc` and when `CMAKE_CXX_STANDARD` is not set [\#5945](https://github.com/kokkos/kokkos/pull/5945)
|
||||
### Bug Fixes:
|
||||
- Fix Scratch allocation alignment issues [\#5692](https://github.com/kokkos/kokkos/pull/5692)
|
||||
- Fix Intel Classic Compiler ICE [\#5710](https://github.com/kokkos/kokkos/pull/5710)
|
||||
- Don't install std algorithm headers multiple times [\#5711](https://github.com/kokkos/kokkos/pull/5711)
|
||||
- Fix static init order issue in InitalizationSettings [\#5721](https://github.com/kokkos/kokkos/pull/5721)
|
||||
- Fix src/dst Properties in deep_copy(DynamicView,View) [\#5732](https://github.com/kokkos/kokkos/pull/5732)
|
||||
- Fix build on Fedora Rawhide [\#5782](https://github.com/kokkos/kokkos/pull/5782)
|
||||
- Finalize HIP lock arrays [\#5694](https://github.com/kokkos/kokkos/pull/5694)
|
||||
- Fix CUDA lock arrays for current Desul [\#5812](https://github.com/kokkos/kokkos/pull/5812)
|
||||
- Set the correct device/context in InterOp tests [\#5701](https://github.com/kokkos/kokkos/pull/5701)
|
||||
|
||||
## [3.7.01](https://github.com/kokkos/kokkos/tree/3.7.01) (2022-12-01)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.00...3.7.01)
|
||||
|
||||
@ -129,7 +129,7 @@ ENDIF()
|
||||
|
||||
set(Kokkos_VERSION_MAJOR 3)
|
||||
set(Kokkos_VERSION_MINOR 7)
|
||||
set(Kokkos_VERSION_PATCH 01)
|
||||
set(Kokkos_VERSION_PATCH 02)
|
||||
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
|
||||
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
|
||||
|
||||
|
||||
@ -12,7 +12,7 @@ endif
|
||||
|
||||
KOKKOS_VERSION_MAJOR = 3
|
||||
KOKKOS_VERSION_MINOR = 7
|
||||
KOKKOS_VERSION_PATCH = 01
|
||||
KOKKOS_VERSION_PATCH = 02
|
||||
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
|
||||
|
||||
# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
|
||||
|
||||
@ -25,7 +25,7 @@ INSTALL (
|
||||
# These will get ignored for standalone CMake and a true interface library made
|
||||
KOKKOS_ADD_INTERFACE_LIBRARY(
|
||||
kokkosalgorithms
|
||||
HEADERS ${ALGO_HEADERS}
|
||||
NOINSTALLHEADERS ${ALGO_HEADERS}
|
||||
SOURCES ${ALGO_SOURCES}
|
||||
)
|
||||
KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms
|
||||
|
||||
@ -214,6 +214,9 @@ GLOBAL_SET(KOKKOS_AMDGPU_OPTIONS)
|
||||
IF(KOKKOS_ENABLE_HIP)
|
||||
SET(AMDGPU_ARCH_FLAG "--offload-arch")
|
||||
IF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC)
|
||||
IF(KOKKOS_CXX_STANDARD STREQUAL 14 AND NOT CMAKE_CXX_STANDARD)
|
||||
message(FATAL_ERROR "Set CMAKE_CXX_STANDARD to 14")
|
||||
ENDIF()
|
||||
GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS -x hip)
|
||||
IF(DEFINED ENV{ROCM_PATH})
|
||||
GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS --rocm-path=$ENV{ROCM_PATH})
|
||||
|
||||
@ -534,13 +534,6 @@ FUNCTION(KOKKOS_ADD_INTERFACE_LIBRARY NAME)
|
||||
IF (KOKKOS_HAS_TRILINOS)
|
||||
TRIBITS_ADD_LIBRARY(${NAME} ${ARGN})
|
||||
ELSE()
|
||||
CMAKE_PARSE_ARGUMENTS(PARSE
|
||||
""
|
||||
""
|
||||
"HEADERS;SOURCES"
|
||||
${ARGN}
|
||||
)
|
||||
|
||||
ADD_LIBRARY(${NAME} INTERFACE)
|
||||
KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${NAME})
|
||||
ENDIF()
|
||||
|
||||
@ -915,8 +915,8 @@ inline void deep_copy(const View<T, DP...>& dst,
|
||||
template <class T, class... DP, class... SP>
|
||||
inline void deep_copy(const Kokkos::Experimental::DynamicView<T, DP...>& dst,
|
||||
const View<T, SP...>& src) {
|
||||
using dst_type = Kokkos::Experimental::DynamicView<T, SP...>;
|
||||
using src_type = View<T, DP...>;
|
||||
using dst_type = Kokkos::Experimental::DynamicView<T, DP...>;
|
||||
using src_type = View<T, SP...>;
|
||||
|
||||
using dst_execution_space = typename ViewTraits<T, DP...>::execution_space;
|
||||
using src_memory_space = typename ViewTraits<T, SP...>::memory_space;
|
||||
|
||||
@ -240,6 +240,83 @@ struct TestDynamicView {
|
||||
ASSERT_EQ(new_result_sum, (value_type)(da_resize * (da_resize - 1) / 2));
|
||||
#endif
|
||||
} // end scope
|
||||
|
||||
// Test: Reproducer to demonstrate compile-time error of deep_copy
|
||||
// of DynamicView to/from on-host View.
|
||||
// Case 4:
|
||||
{
|
||||
using device_view_type = Kokkos::View<Scalar*, Space>;
|
||||
using host_view_type = typename Kokkos::View<Scalar*, Space>::HostMirror;
|
||||
|
||||
view_type device_dynamic_view("on-device DynamicView", 1024,
|
||||
arg_total_size);
|
||||
device_view_type device_view("on-device View", arg_total_size);
|
||||
host_view_type host_view("on-host View", arg_total_size);
|
||||
|
||||
unsigned da_size = arg_total_size / 8;
|
||||
device_dynamic_view.resize_serial(da_size);
|
||||
|
||||
// Use parallel_for to populate device_dynamic_view and verify values
|
||||
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
|
||||
Kokkos::parallel_for(
|
||||
Kokkos::RangePolicy<execution_space>(0, da_size),
|
||||
KOKKOS_LAMBDA(const int i) { device_dynamic_view(i) = Scalar(i); });
|
||||
|
||||
value_type result_sum = 0.0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::RangePolicy<execution_space>(0, da_size),
|
||||
KOKKOS_LAMBDA(const int i, value_type& partial_sum) {
|
||||
partial_sum += (value_type)device_dynamic_view(i);
|
||||
},
|
||||
result_sum);
|
||||
|
||||
ASSERT_EQ(result_sum, (value_type)(da_size * (da_size - 1) / 2));
|
||||
#endif
|
||||
|
||||
// Use an on-device View as intermediate to deep_copy the
|
||||
// device_dynamic_view to host, zero out the device_dynamic_view,
|
||||
// deep_copy from host back to the device_dynamic_view and verify
|
||||
Kokkos::deep_copy(device_view, device_dynamic_view);
|
||||
Kokkos::deep_copy(host_view, device_view);
|
||||
Kokkos::deep_copy(device_view, host_view);
|
||||
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
|
||||
Kokkos::parallel_for(
|
||||
Kokkos::RangePolicy<execution_space>(0, da_size),
|
||||
KOKKOS_LAMBDA(const int i) { device_dynamic_view(i) = Scalar(0); });
|
||||
#endif
|
||||
Kokkos::deep_copy(device_dynamic_view, device_view);
|
||||
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
|
||||
value_type new_result_sum = 0.0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::RangePolicy<execution_space>(0, da_size),
|
||||
KOKKOS_LAMBDA(const int i, value_type& partial_sum) {
|
||||
partial_sum += (value_type)device_dynamic_view(i);
|
||||
},
|
||||
new_result_sum);
|
||||
|
||||
ASSERT_EQ(new_result_sum, (value_type)(da_size * (da_size - 1) / 2));
|
||||
#endif
|
||||
|
||||
// Try to deep_copy device_dynamic_view directly to/from host.
|
||||
// host-to-device currently fails to compile because DP and SP are
|
||||
// swapped in the deep_copy implementation.
|
||||
// Once that's fixed, both deep_copy's will fail at runtime because the
|
||||
// destination execution space cannot access the source memory space.
|
||||
try {
|
||||
Kokkos::deep_copy(host_view, device_dynamic_view);
|
||||
} catch (std::runtime_error const& error) {
|
||||
std::string msg = error.what();
|
||||
std::cerr << "Copy from on-device DynamicView to on-host View failed:\n"
|
||||
<< msg << std::endl;
|
||||
}
|
||||
try {
|
||||
Kokkos::deep_copy(device_dynamic_view, host_view);
|
||||
} catch (std::runtime_error const& error) {
|
||||
std::string msg = error.what();
|
||||
std::cerr << "Copy from on-host View to on-device DynamicView failed:\n"
|
||||
<< msg << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -73,7 +73,7 @@ void textcolor(int attr, int fg, int bg) {
|
||||
char command[40];
|
||||
|
||||
/* Command is the control command to the terminal */
|
||||
sprintf(command, "%c[%d;%d;%dm", 0x1B, attr, fg + 30, bg + 40);
|
||||
snprintf(command, 40, "%c[%d;%d;%dm", 0x1B, attr, fg + 30, bg + 40);
|
||||
printf("%s", command);
|
||||
}
|
||||
void textcolor_standard() { textcolor(RESET, BLACK, WHITE); }
|
||||
|
||||
@ -710,7 +710,7 @@ struct CudaParallelLaunchImpl<
|
||||
" occupancy requests are currently broken."));
|
||||
}
|
||||
|
||||
KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE();
|
||||
ensure_cuda_lock_arrays_on_device();
|
||||
|
||||
// Invoke the driver function on the device
|
||||
base_t::invoke_kernel(driver, grid, block, shmem, cuda_instance);
|
||||
|
||||
@ -79,8 +79,7 @@ CudaLockArrays g_host_cuda_lock_arrays = {nullptr, 0};
|
||||
void initialize_host_cuda_lock_arrays() {
|
||||
#ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
|
||||
desul::Impl::init_lock_arrays();
|
||||
|
||||
DESUL_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE();
|
||||
desul::ensure_cuda_lock_arrays_on_device();
|
||||
#endif
|
||||
if (g_host_cuda_lock_arrays.atomic != nullptr) return;
|
||||
KOKKOS_IMPL_CUDA_SAFE_CALL(
|
||||
@ -89,7 +88,7 @@ void initialize_host_cuda_lock_arrays() {
|
||||
Impl::cuda_device_synchronize(
|
||||
"Kokkos::Impl::initialize_host_cuda_lock_arrays: Pre Init Lock Arrays");
|
||||
g_host_cuda_lock_arrays.n = Cuda::concurrency();
|
||||
KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE();
|
||||
copy_cuda_lock_arrays_to_device();
|
||||
init_lock_array_kernel_atomic<<<(CUDA_SPACE_ATOMIC_MASK + 1 + 255) / 256,
|
||||
256>>>();
|
||||
Impl::cuda_device_synchronize(
|
||||
@ -106,7 +105,7 @@ void finalize_host_cuda_lock_arrays() {
|
||||
g_host_cuda_lock_arrays.atomic = nullptr;
|
||||
g_host_cuda_lock_arrays.n = 0;
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE();
|
||||
copy_cuda_lock_arrays_to_device();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -67,7 +67,7 @@ struct CudaLockArrays {
|
||||
|
||||
/// \brief This global variable in Host space is the central definition
|
||||
/// of these arrays.
|
||||
extern Kokkos::Impl::CudaLockArrays g_host_cuda_lock_arrays;
|
||||
extern CudaLockArrays g_host_cuda_lock_arrays;
|
||||
|
||||
/// \brief After this call, the g_host_cuda_lock_arrays variable has
|
||||
/// valid, initialized arrays.
|
||||
@ -105,12 +105,12 @@ namespace Impl {
|
||||
/// instances in other translation units, we must update this CUDA global
|
||||
/// variable based on the Host global variable prior to running any kernels
|
||||
/// that will use it.
|
||||
/// That is the purpose of the KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE macro.
|
||||
/// That is the purpose of the ensure_cuda_lock_arrays_on_device function.
|
||||
__device__
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
__constant__ extern
|
||||
#endif
|
||||
Kokkos::Impl::CudaLockArrays g_device_cuda_lock_arrays;
|
||||
CudaLockArrays g_device_cuda_lock_arrays;
|
||||
|
||||
#define CUDA_SPACE_ATOMIC_MASK 0x1FFFF
|
||||
|
||||
@ -123,9 +123,7 @@ __device__ inline bool lock_address_cuda_space(void* ptr) {
|
||||
size_t offset = size_t(ptr);
|
||||
offset = offset >> 2;
|
||||
offset = offset & CUDA_SPACE_ATOMIC_MASK;
|
||||
return (
|
||||
0 ==
|
||||
atomicCAS(&Kokkos::Impl::g_device_cuda_lock_arrays.atomic[offset], 0, 1));
|
||||
return (0 == atomicCAS(&g_device_cuda_lock_arrays.atomic[offset], 0, 1));
|
||||
}
|
||||
|
||||
/// \brief Release lock for the address
|
||||
@ -138,7 +136,7 @@ __device__ inline void unlock_address_cuda_space(void* ptr) {
|
||||
size_t offset = size_t(ptr);
|
||||
offset = offset >> 2;
|
||||
offset = offset & CUDA_SPACE_ATOMIC_MASK;
|
||||
atomicExch(&Kokkos::Impl::g_device_cuda_lock_arrays.atomic[offset], 0);
|
||||
atomicExch(&g_device_cuda_lock_arrays.atomic[offset], 0);
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
@ -151,45 +149,49 @@ namespace {
|
||||
static int lock_array_copied = 0;
|
||||
inline int eliminate_warning_for_lock_array() { return lock_array_copied; }
|
||||
} // namespace
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
/* Dan Ibanez: it is critical that this code be a macro, so that it will
|
||||
capture the right address for Kokkos::Impl::g_device_cuda_lock_arrays!
|
||||
putting this in an inline function will NOT do the right thing! */
|
||||
#define KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE() \
|
||||
{ \
|
||||
if (::Kokkos::Impl::lock_array_copied == 0) { \
|
||||
KOKKOS_IMPL_CUDA_SAFE_CALL( \
|
||||
cudaMemcpyToSymbol(Kokkos::Impl::g_device_cuda_lock_arrays, \
|
||||
&Kokkos::Impl::g_host_cuda_lock_arrays, \
|
||||
sizeof(Kokkos::Impl::CudaLockArrays))); \
|
||||
} \
|
||||
lock_array_copied = 1; \
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
inline
|
||||
#else
|
||||
inline static
|
||||
#endif
|
||||
void
|
||||
copy_cuda_lock_arrays_to_device() {
|
||||
if (lock_array_copied == 0) {
|
||||
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemcpyToSymbol(g_device_cuda_lock_arrays,
|
||||
&g_host_cuda_lock_arrays,
|
||||
sizeof(CudaLockArrays)));
|
||||
}
|
||||
lock_array_copied = 1;
|
||||
}
|
||||
|
||||
#ifndef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
|
||||
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE()
|
||||
inline void ensure_cuda_lock_arrays_on_device() {}
|
||||
#else
|
||||
#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() \
|
||||
KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE()
|
||||
inline static void ensure_cuda_lock_arrays_on_device() {
|
||||
copy_cuda_lock_arrays_to_device();
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE()
|
||||
inline void ensure_cuda_lock_arrays_on_device() {}
|
||||
#else
|
||||
// Still Need COPY_CUDA_LOCK_ARRAYS for team scratch etc.
|
||||
#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() \
|
||||
KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE() \
|
||||
DESUL_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE()
|
||||
inline static void ensure_cuda_lock_arrays_on_device() {
|
||||
copy_cuda_lock_arrays_to_device();
|
||||
desul::ensure_cuda_lock_arrays_on_device();
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* defined( KOKKOS_ENABLE_IMPL_DESUL_ATOMICS ) */
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* defined( KOKKOS_ENABLE_CUDA ) */
|
||||
|
||||
#endif /* #ifndef KOKKOS_CUDA_LOCKS_HPP */
|
||||
|
||||
@ -428,6 +428,8 @@ void HIPInternal::finalize() {
|
||||
|
||||
if (this == &singleton()) {
|
||||
(void)Kokkos::Impl::hip_global_unique_token_locks(true);
|
||||
Kokkos::Impl::finalize_host_hip_lock_arrays();
|
||||
|
||||
KOKKOS_IMPL_HIP_SAFE_CALL(hipHostFree(constantMemHostStaging));
|
||||
KOKKOS_IMPL_HIP_SAFE_CALL(hipEventDestroy(constantMemReusable));
|
||||
}
|
||||
|
||||
@ -228,11 +228,6 @@
|
||||
#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
#endif
|
||||
|
||||
// FIXME Workaround for ICE with intel 17,18,19,20,21 in Trilinos
|
||||
#if (KOKKOS_COMPILER_INTEL <= 2100)
|
||||
#define KOKKOS_IMPL_WORKAROUND_ICE_IN_TRILINOS_WITH_OLD_INTEL_COMPILERS
|
||||
#endif
|
||||
|
||||
// FIXME_SYCL
|
||||
#if !defined(KOKKOS_ENABLE_SYCL)
|
||||
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
@ -653,7 +648,8 @@ static constexpr bool kokkos_omp_on_host() { return false; }
|
||||
#if (defined(KOKKOS_COMPILER_GNU) || defined(KOKKOS_COMPILER_CLANG) || \
|
||||
defined(KOKKOS_COMPILER_INTEL) || defined(KOKKOS_COMPILER_PGI)) && \
|
||||
!defined(_WIN32)
|
||||
#if (!defined(__linux__) || defined(__GLIBC_MINOR__))
|
||||
// disable stacktrace for musl-libc
|
||||
#if !defined(__linux__) || defined(__GLIBC_MINOR__)
|
||||
#define KOKKOS_IMPL_ENABLE_STACKTRACE
|
||||
#endif
|
||||
#define KOKKOS_IMPL_ENABLE_CXXABI
|
||||
|
||||
@ -73,9 +73,8 @@ class ScratchMemorySpace {
|
||||
"Instantiating ScratchMemorySpace on non-execution-space type.");
|
||||
|
||||
public:
|
||||
// Alignment of memory chunks returned by 'get'
|
||||
// must be a power of two
|
||||
enum { ALIGN = 8 };
|
||||
// Minimal overalignment used by view scratch allocations
|
||||
constexpr static int ALIGN = 8;
|
||||
|
||||
private:
|
||||
mutable char* m_iter_L0 = nullptr;
|
||||
@ -87,7 +86,9 @@ class ScratchMemorySpace {
|
||||
mutable int m_offset = 0;
|
||||
mutable int m_default_level = 0;
|
||||
|
||||
enum { MASK = ALIGN - 1 }; // Alignment used by View::shmem_size
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
constexpr static int DEFAULT_ALIGNMENT_MASK = ALIGN - 1;
|
||||
#endif
|
||||
|
||||
public:
|
||||
//! Tag this class as a memory space
|
||||
@ -101,39 +102,59 @@ class ScratchMemorySpace {
|
||||
|
||||
static constexpr const char* name() { return "ScratchMemorySpace"; }
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
// This function is unused
|
||||
template <typename IntType>
|
||||
KOKKOS_INLINE_FUNCTION static IntType align(const IntType& size) {
|
||||
return (size + MASK) & ~MASK;
|
||||
KOKKOS_DEPRECATED KOKKOS_INLINE_FUNCTION static constexpr IntType align(
|
||||
const IntType& size) {
|
||||
return (size + DEFAULT_ALIGNMENT_MASK) & ~DEFAULT_ALIGNMENT_MASK;
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename IntType>
|
||||
KOKKOS_INLINE_FUNCTION void* get_shmem(const IntType& size,
|
||||
int level = -1) const {
|
||||
return get_shmem_common</*aligned*/ false>(size, 1, level);
|
||||
return get_shmem_common</*alignment_requested*/ false>(size, 1, level);
|
||||
}
|
||||
|
||||
template <typename IntType>
|
||||
KOKKOS_INLINE_FUNCTION void* get_shmem_aligned(const IntType& size,
|
||||
const ptrdiff_t alignment,
|
||||
int level = -1) const {
|
||||
return get_shmem_common</*aligned*/ true>(size, alignment, level);
|
||||
return get_shmem_common</*alignment_requested*/ true>(size, alignment,
|
||||
level);
|
||||
}
|
||||
|
||||
private:
|
||||
template <bool aligned, typename IntType>
|
||||
template <bool alignment_requested, typename IntType>
|
||||
KOKKOS_INLINE_FUNCTION void* get_shmem_common(const IntType& size,
|
||||
const ptrdiff_t alignment,
|
||||
int level = -1) const {
|
||||
if (level == -1) level = m_default_level;
|
||||
auto& m_iter = (level == 0) ? m_iter_L0 : m_iter_L1;
|
||||
auto& m_end = (level == 0) ? m_end_L0 : m_end_L1;
|
||||
char* previous = m_iter;
|
||||
const ptrdiff_t missalign = size_t(m_iter) % alignment;
|
||||
if (missalign) m_iter += alignment - missalign;
|
||||
auto& m_iter = (level == 0) ? m_iter_L0 : m_iter_L1;
|
||||
auto& m_end = (level == 0) ? m_end_L0 : m_end_L1;
|
||||
|
||||
void* tmp = m_iter + m_offset * (aligned ? size : align(size));
|
||||
if (m_end < (m_iter += (aligned ? size : align(size)) * m_multiplier)) {
|
||||
m_iter = previous; // put it back like it was
|
||||
if (alignment_requested) {
|
||||
const ptrdiff_t missalign = size_t(m_iter) % alignment;
|
||||
if (missalign) m_iter += alignment - missalign;
|
||||
}
|
||||
|
||||
// This is each thread's start pointer for its allocation
|
||||
// Note: for team scratch m_offset is 0, since every
|
||||
// thread will get back the same shared pointer
|
||||
void* tmp = m_iter + m_offset * size;
|
||||
ptrdiff_t increment = size * m_multiplier;
|
||||
|
||||
// increment m_iter first and decrement it again if not
|
||||
// enough memory was available. In the non-failing path
|
||||
// this will save instructions.
|
||||
m_iter += increment;
|
||||
|
||||
if (m_end < m_iter) {
|
||||
// Request did overflow: reset the base team ptr, and
|
||||
// return nullptr
|
||||
m_iter -= increment;
|
||||
tmp = nullptr;
|
||||
#ifdef KOKKOS_ENABLE_DEBUG
|
||||
// mfh 23 Jun 2015: printf call consumes 25 registers
|
||||
// in a CUDA build, so only print in debug mode. The
|
||||
@ -143,7 +164,6 @@ class ScratchMemorySpace {
|
||||
"%ld byte(s); remaining capacity is %ld byte(s)\n",
|
||||
long(size), long(m_end - m_iter));
|
||||
#endif // KOKKOS_ENABLE_DEBUG
|
||||
tmp = nullptr;
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
|
||||
@ -203,19 +203,11 @@ class Serial {
|
||||
static const char* name();
|
||||
|
||||
Impl::SerialInternal* impl_internal_space_instance() const {
|
||||
#ifdef KOKKOS_IMPL_WORKAROUND_ICE_IN_TRILINOS_WITH_OLD_INTEL_COMPILERS
|
||||
return m_space_instance;
|
||||
#else
|
||||
return m_space_instance.get();
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
#ifdef KOKKOS_IMPL_WORKAROUND_ICE_IN_TRILINOS_WITH_OLD_INTEL_COMPILERS
|
||||
Impl::SerialInternal* m_space_instance;
|
||||
#else
|
||||
Kokkos::Impl::HostSharedPtr<Impl::SerialInternal> m_space_instance;
|
||||
#endif
|
||||
//--------------------------------------------------------------------------
|
||||
};
|
||||
|
||||
|
||||
@ -67,6 +67,8 @@ KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.")
|
||||
|
||||
#include <impl/Kokkos_Tools.hpp>
|
||||
|
||||
#include <Kokkos_MinMaxClamp.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -1692,19 +1694,27 @@ class View : public ViewTraits<DataType, Properties...> {
|
||||
arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7));
|
||||
}
|
||||
|
||||
private:
|
||||
// Want to be able to align to minimum scratch alignment or sizeof or alignof
|
||||
// elements
|
||||
static constexpr size_t scratch_value_alignment =
|
||||
::Kokkos::max(::Kokkos::max(sizeof(typename traits::value_type),
|
||||
alignof(typename traits::value_type)),
|
||||
static_cast<size_t>(
|
||||
traits::execution_space::scratch_memory_space::ALIGN));
|
||||
|
||||
public:
|
||||
static KOKKOS_INLINE_FUNCTION size_t
|
||||
shmem_size(typename traits::array_layout const& arg_layout) {
|
||||
return map_type::memory_span(arg_layout) +
|
||||
sizeof(typename traits::value_type);
|
||||
return map_type::memory_span(arg_layout) + scratch_value_alignment;
|
||||
}
|
||||
|
||||
explicit KOKKOS_INLINE_FUNCTION View(
|
||||
const typename traits::execution_space::scratch_memory_space& arg_space,
|
||||
const typename traits::array_layout& arg_layout)
|
||||
: View(Impl::ViewCtorProp<pointer_type>(
|
||||
reinterpret_cast<pointer_type>(arg_space.get_shmem_aligned(
|
||||
map_type::memory_span(arg_layout),
|
||||
sizeof(typename traits::value_type)))),
|
||||
: View(Impl::ViewCtorProp<pointer_type>(reinterpret_cast<pointer_type>(
|
||||
arg_space.get_shmem_aligned(map_type::memory_span(arg_layout),
|
||||
scratch_value_alignment))),
|
||||
arg_layout) {}
|
||||
|
||||
explicit KOKKOS_INLINE_FUNCTION View(
|
||||
@ -1722,7 +1732,7 @@ class View : public ViewTraits<DataType, Properties...> {
|
||||
map_type::memory_span(typename traits::array_layout(
|
||||
arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6,
|
||||
arg_N7)),
|
||||
sizeof(typename traits::value_type)))),
|
||||
scratch_value_alignment))),
|
||||
typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3,
|
||||
arg_N4, arg_N5, arg_N6, arg_N7),
|
||||
check_input_args::yes) {
|
||||
|
||||
@ -173,14 +173,8 @@ void SerialInternal::resize_thread_team_data(size_t pool_reduce_bytes,
|
||||
} // namespace Impl
|
||||
|
||||
Serial::Serial()
|
||||
#ifdef KOKKOS_IMPL_WORKAROUND_ICE_IN_TRILINOS_WITH_OLD_INTEL_COMPILERS
|
||||
: m_space_instance(&Impl::SerialInternal::singleton()) {
|
||||
}
|
||||
#else
|
||||
: m_space_instance(&Impl::SerialInternal::singleton(),
|
||||
[](Impl::SerialInternal*) {}) {
|
||||
}
|
||||
#endif
|
||||
[](Impl::SerialInternal*) {}) {}
|
||||
|
||||
void Serial::print_configuration(std::ostream& os, bool /*verbose*/) const {
|
||||
os << "Host Serial Execution Space:\n";
|
||||
|
||||
@ -63,11 +63,10 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
|
||||
|
||||
const FunctorType m_functor;
|
||||
const MDRangePolicy m_mdr_policy;
|
||||
const Policy m_policy;
|
||||
|
||||
void exec() const {
|
||||
const typename Policy::member_type e = m_policy.end();
|
||||
for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) {
|
||||
const typename Policy::member_type e = m_mdr_policy.m_num_tiles;
|
||||
for (typename Policy::member_type i = 0; i < e; ++i) {
|
||||
iterate_type(m_mdr_policy, m_functor)(i);
|
||||
}
|
||||
}
|
||||
@ -85,9 +84,7 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
|
||||
}
|
||||
inline ParallelFor(const FunctorType& arg_functor,
|
||||
const MDRangePolicy& arg_policy)
|
||||
: m_functor(arg_functor),
|
||||
m_mdr_policy(arg_policy),
|
||||
m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)) {}
|
||||
: m_functor(arg_functor), m_mdr_policy(arg_policy) {}
|
||||
};
|
||||
|
||||
template <class FunctorType, class ReducerType, class... Traits>
|
||||
@ -120,13 +117,12 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
|
||||
|
||||
const FunctorType m_functor;
|
||||
const MDRangePolicy m_mdr_policy;
|
||||
const Policy m_policy;
|
||||
const ReducerType m_reducer;
|
||||
const pointer_type m_result_ptr;
|
||||
|
||||
inline void exec(reference_type update) const {
|
||||
const typename Policy::member_type e = m_policy.end();
|
||||
for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) {
|
||||
const typename Policy::member_type e = m_mdr_policy.m_num_tiles;
|
||||
for (typename Policy::member_type i = 0; i < e; ++i) {
|
||||
iterate_type(m_mdr_policy, m_functor, update)(i);
|
||||
}
|
||||
}
|
||||
@ -148,7 +144,8 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
|
||||
const size_t team_shared_size = 0; // Never shrinks
|
||||
const size_t thread_local_size = 0; // Never shrinks
|
||||
|
||||
auto* internal_instance = m_policy.space().impl_internal_space_instance();
|
||||
auto* internal_instance =
|
||||
m_mdr_policy.space().impl_internal_space_instance();
|
||||
// Need to lock resize_thread_team_data
|
||||
std::lock_guard<std::mutex> lock(
|
||||
internal_instance->m_thread_team_data_mutex);
|
||||
@ -181,7 +178,6 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
|
||||
void*> = nullptr)
|
||||
: m_functor(arg_functor),
|
||||
m_mdr_policy(arg_policy),
|
||||
m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)),
|
||||
m_reducer(InvalidType()),
|
||||
m_result_ptr(arg_result_view.data()) {
|
||||
static_assert(Kokkos::is_view<HostViewType>::value,
|
||||
@ -197,7 +193,6 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
|
||||
MDRangePolicy arg_policy, const ReducerType& reducer)
|
||||
: m_functor(arg_functor),
|
||||
m_mdr_policy(arg_policy),
|
||||
m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)),
|
||||
m_reducer(reducer),
|
||||
m_result_ptr(reducer.view().data()) {
|
||||
/*static_assert( std::is_same< typename ViewType::memory_space
|
||||
|
||||
@ -1165,6 +1165,5 @@ void _kokkos_pgi_compiler_bug_workaround() {}
|
||||
#endif
|
||||
} // namespace Kokkos
|
||||
|
||||
Kokkos::Impl::InitializationSettingsHelper<std::string>::storage_type const
|
||||
Kokkos::Impl::InitializationSettingsHelper<std::string>::unspecified =
|
||||
"some string we don't expect user would ever provide";
|
||||
constexpr char
|
||||
Kokkos::Impl::InitializationSettingsHelper<std::string>::unspecified[];
|
||||
|
||||
@ -104,7 +104,9 @@ struct InitializationSettingsHelper<std::string> {
|
||||
using value_type = std::string;
|
||||
using storage_type = std::string;
|
||||
|
||||
static storage_type const unspecified;
|
||||
// prefer c-string to avoid static initialization order nightmare
|
||||
static constexpr char unspecified[] =
|
||||
"some string we don't expect user would ever provide";
|
||||
};
|
||||
} // namespace Impl
|
||||
|
||||
|
||||
@ -655,9 +655,9 @@ void initialize(const std::string& profileLibrary) {
|
||||
|
||||
char* envProfileLibrary = const_cast<char*>(profileLibrary.c_str());
|
||||
|
||||
const auto envProfileCopy =
|
||||
std::make_unique<char[]>(strlen(envProfileLibrary) + 1);
|
||||
sprintf(envProfileCopy.get(), "%s", envProfileLibrary);
|
||||
const size_t envProfileLen = strlen(envProfileLibrary) + 1;
|
||||
const auto envProfileCopy = std::make_unique<char[]>(envProfileLen);
|
||||
snprintf(envProfileCopy.get(), envProfileLen, "%s", envProfileLibrary);
|
||||
|
||||
char* profileLibraryName = strtok(envProfileCopy.get(), ";");
|
||||
|
||||
|
||||
@ -69,9 +69,10 @@ char** init_kokkos_args(bool do_threads, bool do_numa, bool do_device,
|
||||
nargs = (do_threads ? 1 : 0) + (do_numa ? 1 : 0) + (do_device ? 1 : 0) +
|
||||
(do_other ? 4 : 0) + (do_tune ? 1 : 0);
|
||||
|
||||
char** args_kokkos = new char*[nargs];
|
||||
char** args_kokkos = new char*[nargs];
|
||||
const int max_args_size = 45;
|
||||
for (int i = 0; i < nargs; i++) {
|
||||
args_kokkos[i] = new char[45];
|
||||
args_kokkos[i] = new char[max_args_size];
|
||||
delete_these.insert(args_kokkos[i]);
|
||||
}
|
||||
|
||||
@ -112,7 +113,7 @@ char** init_kokkos_args(bool do_threads, bool do_numa, bool do_device,
|
||||
#endif
|
||||
|
||||
init_args.num_threads = nthreads;
|
||||
sprintf(args_kokkos[threads_idx], "--threads=%i", nthreads);
|
||||
snprintf(args_kokkos[threads_idx], max_args_size, "--threads=%i", nthreads);
|
||||
}
|
||||
|
||||
if (do_numa) {
|
||||
@ -130,24 +131,27 @@ char** init_kokkos_args(bool do_threads, bool do_numa, bool do_device,
|
||||
#endif
|
||||
|
||||
init_args.num_numa = numa;
|
||||
sprintf(args_kokkos[numa_idx], "--numa=%i", numa);
|
||||
snprintf(args_kokkos[numa_idx], max_args_size, "--numa=%i", numa);
|
||||
}
|
||||
|
||||
if (do_device) {
|
||||
init_args.device_id = 0;
|
||||
sprintf(args_kokkos[device_idx], "--device-id=%i", 0);
|
||||
snprintf(args_kokkos[device_idx], max_args_size, "--device-id=%i", 0);
|
||||
}
|
||||
|
||||
if (do_other) {
|
||||
sprintf(args_kokkos[0], "--dummyarg=1");
|
||||
sprintf(args_kokkos[threads_idx + (do_threads ? 1 : 0)], "--dummy2arg");
|
||||
sprintf(args_kokkos[threads_idx + (do_threads ? 1 : 0) + 1], "dummy3arg");
|
||||
sprintf(args_kokkos[device_idx + (do_device ? 1 : 0)], "dummy4arg=1");
|
||||
snprintf(args_kokkos[0], max_args_size, "--dummyarg=1");
|
||||
snprintf(args_kokkos[threads_idx + (do_threads ? 1 : 0)], max_args_size,
|
||||
"--dummy2arg");
|
||||
snprintf(args_kokkos[threads_idx + (do_threads ? 1 : 0) + 1], max_args_size,
|
||||
"dummy3arg");
|
||||
snprintf(args_kokkos[device_idx + (do_device ? 1 : 0)], max_args_size,
|
||||
"dummy4arg=1");
|
||||
}
|
||||
|
||||
if (do_tune) {
|
||||
init_args.tune_internals = true;
|
||||
sprintf(args_kokkos[tune_idx], "--kokkos-tune-internals");
|
||||
snprintf(args_kokkos[tune_idx], max_args_size, "--kokkos-tune-internals");
|
||||
}
|
||||
|
||||
return args_kokkos;
|
||||
|
||||
@ -91,7 +91,7 @@ void test_shared_alloc() {
|
||||
// Since always executed on host space, leave [=]
|
||||
Kokkos::parallel_for(range, [=](int i) {
|
||||
char name[64];
|
||||
sprintf(name, "test_%.2d", i);
|
||||
snprintf(name, 64, "test_%.2d", i);
|
||||
|
||||
r[i] = RecordMemS::allocate(s, name, size * (i + 1));
|
||||
h[i] = Header::get_header(r[i]->data());
|
||||
@ -135,7 +135,7 @@ void test_shared_alloc() {
|
||||
|
||||
Kokkos::parallel_for(range, [=](size_t i) {
|
||||
char name[64];
|
||||
sprintf(name, "test_%.2d", int(i));
|
||||
snprintf(name, 64, "test_%.2d", int(i));
|
||||
|
||||
RecordFull* rec = RecordFull::allocate(s, name, size * (i + 1));
|
||||
|
||||
|
||||
@ -1551,14 +1551,16 @@ struct TestScratchAlignment {
|
||||
double x, y, z;
|
||||
};
|
||||
TestScratchAlignment() {
|
||||
test(true);
|
||||
test(false);
|
||||
test_view(true);
|
||||
test_view(false);
|
||||
test_minimal();
|
||||
test_raw();
|
||||
}
|
||||
using ScratchView =
|
||||
Kokkos::View<TestScalar *, typename ExecSpace::scratch_memory_space>;
|
||||
using ScratchViewInt =
|
||||
Kokkos::View<int *, typename ExecSpace::scratch_memory_space>;
|
||||
void test(bool allocate_small) {
|
||||
void test_view(bool allocate_small) {
|
||||
int shmem_size = ScratchView::shmem_size(11);
|
||||
#ifdef KOKKOS_ENABLE_OPENMPTARGET
|
||||
int team_size =
|
||||
@ -1580,12 +1582,68 @@ struct TestScratchAlignment {
|
||||
});
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
void test_minimal() {
|
||||
using member_type = typename Kokkos::TeamPolicy<ExecSpace>::member_type;
|
||||
Kokkos::TeamPolicy<ExecSpace> policy(1, 1);
|
||||
size_t scratch_size = sizeof(int);
|
||||
Kokkos::View<int, ExecSpace> flag("Flag");
|
||||
|
||||
Kokkos::parallel_for(
|
||||
policy.set_scratch_size(0, Kokkos::PerTeam(scratch_size)),
|
||||
KOKKOS_LAMBDA(const member_type &team) {
|
||||
int *scratch_ptr = (int *)team.team_shmem().get_shmem(scratch_size);
|
||||
if (scratch_ptr == nullptr) flag() = 1;
|
||||
});
|
||||
Kokkos::fence();
|
||||
int minimal_scratch_allocation_failed = 0;
|
||||
Kokkos::deep_copy(minimal_scratch_allocation_failed, flag);
|
||||
ASSERT_TRUE(minimal_scratch_allocation_failed == 0);
|
||||
}
|
||||
|
||||
void test_raw() {
|
||||
using member_type = typename Kokkos::TeamPolicy<ExecSpace>::member_type;
|
||||
Kokkos::TeamPolicy<ExecSpace> policy(1, 1);
|
||||
Kokkos::View<int, ExecSpace> flag("Flag");
|
||||
|
||||
Kokkos::parallel_for(
|
||||
policy.set_scratch_size(0, Kokkos::PerTeam(1024)),
|
||||
KOKKOS_LAMBDA(const member_type &team) {
|
||||
int *scratch_ptr1 = (int *)team.team_shmem().get_shmem(24);
|
||||
int *scratch_ptr2 = (int *)team.team_shmem().get_shmem(32);
|
||||
int *scratch_ptr3 = (int *)team.team_shmem().get_shmem(12);
|
||||
|
||||
if ((int(scratch_ptr2 - scratch_ptr1) != 6) ||
|
||||
(int(scratch_ptr3 - scratch_ptr2) != 8))
|
||||
flag() = 1;
|
||||
|
||||
if (((scratch_ptr3 - static_cast<int *>(nullptr)) + 3) % 2 == 1)
|
||||
scratch_ptr1 = (int *)team.team_shmem().get_shmem_aligned(24, 4);
|
||||
else {
|
||||
scratch_ptr1 = (int *)team.team_shmem().get_shmem_aligned(12, 4);
|
||||
}
|
||||
scratch_ptr2 = (int *)team.team_shmem().get_shmem_aligned(32, 8);
|
||||
scratch_ptr3 = (int *)team.team_shmem().get_shmem_aligned(8, 4);
|
||||
|
||||
if ((int(scratch_ptr2 - scratch_ptr1) != 7) &&
|
||||
(int(scratch_ptr2 - scratch_ptr1) != 4))
|
||||
flag() = 1;
|
||||
if (int(scratch_ptr3 - scratch_ptr2) != 8) flag() = 1;
|
||||
if ((int(size_t(scratch_ptr1) % 4) != 0) ||
|
||||
(int(size_t(scratch_ptr2) % 8) != 0) ||
|
||||
(int(size_t(scratch_ptr3) % 4) != 0))
|
||||
flag() = 1;
|
||||
});
|
||||
Kokkos::fence();
|
||||
int raw_get_shmem_alignment_failed = 0;
|
||||
Kokkos::deep_copy(raw_get_shmem_alignment_failed, flag);
|
||||
ASSERT_TRUE(raw_get_shmem_alignment_failed == 0);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace {
|
||||
|
||||
template <class ExecSpace>
|
||||
struct TestTeamPolicyHandleByValue {
|
||||
using scalar = double;
|
||||
|
||||
@ -59,9 +59,11 @@ __global__ void offset(int* p) {
|
||||
// Test whether allocations survive Kokkos initialize/finalize if done via Raw
|
||||
// Cuda.
|
||||
TEST(cuda, raw_cuda_interop) {
|
||||
// Make sure that we use the same device for all allocations
|
||||
Kokkos::initialize();
|
||||
|
||||
int* p;
|
||||
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc(&p, sizeof(int) * 100));
|
||||
Kokkos::initialize();
|
||||
|
||||
Kokkos::View<int*, Kokkos::MemoryTraits<Kokkos::Unmanaged>> v(p, 100);
|
||||
Kokkos::deep_copy(v, 5);
|
||||
|
||||
@ -48,9 +48,11 @@
|
||||
namespace Test {
|
||||
// Test Interoperability with Cuda Streams
|
||||
TEST(cuda, raw_cuda_streams) {
|
||||
// Make sure that we use the same device for all allocations
|
||||
Kokkos::initialize();
|
||||
|
||||
cudaStream_t stream;
|
||||
cudaStreamCreate(&stream);
|
||||
Kokkos::initialize();
|
||||
int* p;
|
||||
cudaMalloc(&p, sizeof(int) * 100);
|
||||
using MemorySpace = typename TEST_EXECSPACE::memory_space;
|
||||
|
||||
@ -59,9 +59,11 @@ __global__ void offset(int* p) {
|
||||
// Test whether allocations survive Kokkos initialize/finalize if done via Raw
|
||||
// HIP.
|
||||
TEST(hip, raw_hip_interop) {
|
||||
// Make sure that we use the same device for all allocations
|
||||
Kokkos::initialize();
|
||||
|
||||
int* p;
|
||||
KOKKOS_IMPL_HIP_SAFE_CALL(hipMalloc(&p, sizeof(int) * 100));
|
||||
Kokkos::initialize();
|
||||
|
||||
Kokkos::View<int*, Kokkos::MemoryTraits<Kokkos::Unmanaged>> v(p, 100);
|
||||
Kokkos::deep_copy(v, 5);
|
||||
|
||||
@ -50,9 +50,11 @@ namespace Test {
|
||||
// The difference with the CUDA tests are: raw HIP vs raw CUDA and no launch
|
||||
// bound in HIP due to an error when computing the block size.
|
||||
TEST(hip, raw_hip_streams) {
|
||||
// Make sure that we use the same device for all allocations
|
||||
Kokkos::initialize();
|
||||
|
||||
hipStream_t stream;
|
||||
KOKKOS_IMPL_HIP_SAFE_CALL(hipStreamCreate(&stream));
|
||||
Kokkos::initialize();
|
||||
int* p;
|
||||
KOKKOS_IMPL_HIP_SAFE_CALL(hipMalloc(&p, sizeof(int) * 100));
|
||||
using MemorySpace = typename TEST_EXECSPACE::memory_space;
|
||||
|
||||
@ -52,8 +52,8 @@ namespace Test {
|
||||
// Test whether allocations survive Kokkos initialize/finalize if done via Raw
|
||||
// SYCL.
|
||||
TEST(sycl, raw_sycl_interop) {
|
||||
// Make sure all queues use the same context
|
||||
Kokkos::initialize();
|
||||
|
||||
Kokkos::Experimental::SYCL default_space;
|
||||
sycl::context default_context = default_space.sycl_queue().get_context();
|
||||
|
||||
|
||||
@ -51,6 +51,7 @@ namespace Test {
|
||||
|
||||
// Test whether external allocations can be accessed by the default queue.
|
||||
TEST(sycl, raw_sycl_interop_context_1) {
|
||||
// Make sure all queues use the same context
|
||||
Kokkos::Experimental::SYCL default_space;
|
||||
sycl::context default_context = default_space.sycl_queue().get_context();
|
||||
|
||||
|
||||
@ -48,9 +48,13 @@
|
||||
namespace Test {
|
||||
// Test Interoperability with SYCL Streams
|
||||
TEST(sycl, raw_sycl_queues) {
|
||||
sycl::default_selector device_selector;
|
||||
sycl::queue queue(device_selector);
|
||||
// Make sure all queues use the same context
|
||||
Kokkos::initialize();
|
||||
Kokkos::Experimental::SYCL default_space;
|
||||
sycl::context default_context = default_space.sycl_queue().get_context();
|
||||
|
||||
sycl::default_selector device_selector;
|
||||
sycl::queue queue(default_context, device_selector);
|
||||
int* p = sycl::malloc_device<int>(100, queue);
|
||||
using MemorySpace = typename TEST_EXECSPACE::memory_space;
|
||||
|
||||
|
||||
@ -76,7 +76,7 @@ namespace Impl {
|
||||
/// instances in other translation units, we must update this CUDA global
|
||||
/// variable based on the Host global variable prior to running any kernels
|
||||
/// that will use it.
|
||||
/// That is the purpose of the KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE macro.
|
||||
/// That is the purpose of the ensure_cuda_lock_arrays_on_device function.
|
||||
__device__
|
||||
#ifdef __CUDACC_RDC__
|
||||
__constant__ extern
|
||||
@ -138,33 +138,42 @@ namespace {
|
||||
static int lock_array_copied = 0;
|
||||
inline int eliminate_warning_for_lock_array() { return lock_array_copied; }
|
||||
} // namespace
|
||||
|
||||
#ifdef __CUDACC_RDC__
|
||||
inline
|
||||
#else
|
||||
inline static
|
||||
#endif
|
||||
void
|
||||
copy_cuda_lock_arrays_to_device() {
|
||||
if (lock_array_copied == 0) {
|
||||
cudaMemcpyToSymbol(CUDA_SPACE_ATOMIC_LOCKS_DEVICE,
|
||||
&CUDA_SPACE_ATOMIC_LOCKS_DEVICE_h,
|
||||
sizeof(int32_t*));
|
||||
cudaMemcpyToSymbol(CUDA_SPACE_ATOMIC_LOCKS_NODE,
|
||||
&CUDA_SPACE_ATOMIC_LOCKS_NODE_h,
|
||||
sizeof(int32_t*));
|
||||
}
|
||||
lock_array_copied = 1;
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace desul
|
||||
/* It is critical that this code be a macro, so that it will
|
||||
capture the right address for desul::Impl::CUDA_SPACE_ATOMIC_LOCKS_DEVICE
|
||||
putting this in an inline function will NOT do the right thing! */
|
||||
#define DESUL_IMPL_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE() \
|
||||
{ \
|
||||
if (::desul::Impl::lock_array_copied == 0) { \
|
||||
cudaMemcpyToSymbol(::desul::Impl::CUDA_SPACE_ATOMIC_LOCKS_DEVICE, \
|
||||
&::desul::Impl::CUDA_SPACE_ATOMIC_LOCKS_DEVICE_h, \
|
||||
sizeof(int32_t*)); \
|
||||
cudaMemcpyToSymbol(::desul::Impl::CUDA_SPACE_ATOMIC_LOCKS_NODE, \
|
||||
&::desul::Impl::CUDA_SPACE_ATOMIC_LOCKS_NODE_h, \
|
||||
sizeof(int32_t*)); \
|
||||
} \
|
||||
::desul::Impl::lock_array_copied = 1; \
|
||||
}
|
||||
|
||||
#endif /* defined( __CUDACC__ ) */
|
||||
|
||||
#endif /* defined( DESUL_HAVE_CUDA_ATOMICS ) */
|
||||
|
||||
namespace desul {
|
||||
|
||||
#if defined(__CUDACC_RDC__) || (!defined(__CUDACC__))
|
||||
#define DESUL_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE()
|
||||
inline void ensure_cuda_lock_arrays_on_device() {}
|
||||
#else
|
||||
#define DESUL_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() \
|
||||
DESUL_IMPL_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE()
|
||||
static inline void ensure_cuda_lock_arrays_on_device() {
|
||||
Impl::copy_cuda_lock_arrays_to_device();
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* #ifndef KOKKOS_CUDA_LOCKS_HPP_ */
|
||||
} // namespace desul
|
||||
|
||||
#endif /* #ifndef DESUL_ATOMICS_LOCK_ARRAY_CUDA_HPP_ */
|
||||
|
||||
@ -70,7 +70,7 @@ void init_lock_arrays_cuda() {
|
||||
"init_lock_arrays_cuda: cudaMalloc host locks");
|
||||
|
||||
auto error_sync1 = cudaDeviceSynchronize();
|
||||
DESUL_IMPL_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE();
|
||||
copy_cuda_lock_arrays_to_device();
|
||||
check_error_and_throw_cuda(error_sync1, "init_lock_arrays_cuda: post mallocs");
|
||||
init_lock_arrays_cuda_kernel<<<(CUDA_SPACE_ATOMIC_MASK + 1 + 255) / 256, 256>>>();
|
||||
auto error_sync2 = cudaDeviceSynchronize();
|
||||
@ -85,7 +85,7 @@ void finalize_lock_arrays_cuda() {
|
||||
CUDA_SPACE_ATOMIC_LOCKS_DEVICE_h = nullptr;
|
||||
CUDA_SPACE_ATOMIC_LOCKS_NODE_h = nullptr;
|
||||
#ifdef __CUDACC_RDC__
|
||||
DESUL_IMPL_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE();
|
||||
copy_cuda_lock_arrays_to_device();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
1602
potentials/C_SNAP_2021.10.15.quadratic.snapcoeff
Normal file
1602
potentials/C_SNAP_2021.10.15.quadratic.snapcoeff
Normal file
File diff suppressed because it is too large
Load Diff
10
potentials/C_SNAP_2021.10.15.quadratic.snapparam
Normal file
10
potentials/C_SNAP_2021.10.15.quadratic.snapparam
Normal file
@ -0,0 +1,10 @@
|
||||
|
||||
# required
|
||||
rcutfac 2.7
|
||||
twojmax 8
|
||||
|
||||
# optional
|
||||
rfac0 0.99363
|
||||
rmin0 0.0
|
||||
bzeroflag 0
|
||||
quadraticflag 1
|
||||
@ -47,6 +47,7 @@ void FixEfieldTIP4P::init()
|
||||
if (atom->tag_enable == 0) error->all(FLERR, "Fix efield/tip4p requires atom IDs");
|
||||
if (!atom->q_flag) error->all(FLERR, "Fix efield/tip4p requires atom attribute q");
|
||||
if (!force->pair) error->all(FLERR, "A TIP4P pair style must be defined fix efield/tip4p");
|
||||
if (pstr) error->all(FLERR, "Fix efield/tip4p does not support the potential keyword");
|
||||
|
||||
int itmp;
|
||||
double *p_qdist = (double *) force->pair->extract("qdist", itmp);
|
||||
|
||||
@ -66,10 +66,10 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
|
||||
FixACKS2ReaxFFKokkos(class LAMMPS *, int, char **);
|
||||
~FixACKS2ReaxFFKokkos();
|
||||
|
||||
void init() override;
|
||||
void setup_pre_force(int) override;
|
||||
void pre_force(int) override;
|
||||
void cleanup_copy();
|
||||
void init();
|
||||
void setup_pre_force(int);
|
||||
void pre_force(int);
|
||||
|
||||
DAT::tdual_ffloat_1d get_s() {return k_s;}
|
||||
|
||||
@ -235,11 +235,11 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
|
||||
|
||||
void init_shielding_k();
|
||||
void init_hist();
|
||||
void allocate_matrix();
|
||||
void allocate_matrix() override;
|
||||
void allocate_array();
|
||||
void deallocate_array();
|
||||
int bicgstab_solve();
|
||||
void calculate_Q();
|
||||
void calculate_Q() override;
|
||||
|
||||
int neighflag;
|
||||
int nlocal,nall,nmax,newton_pair;
|
||||
@ -251,13 +251,13 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
|
||||
typename AT::t_int_2d d_sendlist;
|
||||
typename AT::t_xfloat_1d_um v_buf;
|
||||
|
||||
void grow_arrays(int);
|
||||
void copy_arrays(int, int, int);
|
||||
void grow_arrays(int) override;
|
||||
void copy_arrays(int, int, int) override;
|
||||
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
|
||||
int pack_exchange(int, double *);
|
||||
int unpack_exchange(int, double *);
|
||||
void get_chi_field();
|
||||
double memory_usage();
|
||||
int pack_exchange(int, double *) override;
|
||||
int unpack_exchange(int, double *) override;
|
||||
void get_chi_field() override;
|
||||
double memory_usage() override;
|
||||
|
||||
void sparse_matvec_acks2(typename AT::t_ffloat_1d &, typename AT::t_ffloat_1d &);
|
||||
};
|
||||
|
||||
@ -2298,7 +2298,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_s_dsfac(const real
|
||||
constexpr real_type zero = static_cast<real_type>(0.0);
|
||||
constexpr real_type onehalf = static_cast<real_type>(0.5);
|
||||
|
||||
if (switch_flag == 0) { sfac_outer = zero; dsfac_outer = zero; }
|
||||
if (switch_flag == 0) { sfac_outer = one; dsfac_outer = zero; }
|
||||
else if (switch_flag == 1) {
|
||||
if (r <= rmin0) { sfac_outer = one; dsfac_outer = zero; }
|
||||
else if (r > rcut) { sfac = zero; dsfac = zero; return; }
|
||||
|
||||
@ -404,9 +404,15 @@ void FixQEqReaxFF::init()
|
||||
efield->init();
|
||||
if (strcmp(update->unit_style,"real") != 0)
|
||||
error->all(FLERR,"Must use unit_style real with fix {} and external fields", style);
|
||||
if (efield->varflag != FixEfield::CONSTANT)
|
||||
error->all(FLERR,"Cannot (yet) use fix {} with variable efield", style);
|
||||
|
||||
if (efield->varflag == FixEfield::ATOM && efield->pstyle != FixEfield::ATOM)
|
||||
error->all(FLERR,"Atom-style external electric field requires atom-style "
|
||||
"potential variable when used with fix {}", style);
|
||||
if (((efield->xstyle != FixEfield::CONSTANT) && domain->xperiodic) ||
|
||||
((efield->ystyle != FixEfield::CONSTANT) && domain->yperiodic) ||
|
||||
((efield->zstyle != FixEfield::CONSTANT) && domain->zperiodic))
|
||||
error->all(FLERR,"Must not have electric field component in direction of periodic "
|
||||
"boundary when using charge equilibration with ReaxFF.");
|
||||
if (((fabs(efield->ex) > SMALL) && domain->xperiodic) ||
|
||||
((fabs(efield->ey) > SMALL) && domain->yperiodic) ||
|
||||
((fabs(efield->ez) > SMALL) && domain->zperiodic))
|
||||
@ -1101,26 +1107,36 @@ void FixQEqReaxFF::get_chi_field()
|
||||
|
||||
// efield energy is in real units of kcal/mol/angstrom, need to convert to eV
|
||||
|
||||
const double factor = -1.0/force->qe2f;
|
||||
const double qe2f = force->qe2f;
|
||||
const double factor = -1.0/qe2f;
|
||||
|
||||
|
||||
if (efield->varflag != FixEfield::CONSTANT)
|
||||
efield->update_efield_variables();
|
||||
|
||||
// currently we only support constant efield
|
||||
// atom selection is for the group of fix efield
|
||||
|
||||
if (efield->varflag == FixEfield::CONSTANT) {
|
||||
double unwrap[3];
|
||||
const double fx = efield->ex;
|
||||
const double fy = efield->ey;
|
||||
const double fz = efield->ez;
|
||||
const int efgroupbit = efield->groupbit;
|
||||
double unwrap[3];
|
||||
const double ex = efield->ex;
|
||||
const double ey = efield->ey;
|
||||
const double ez = efield->ez;
|
||||
const int efgroupbit = efield->groupbit;
|
||||
|
||||
// charge interactions
|
||||
// force = qE, potential energy = F dot x in unwrapped coords
|
||||
|
||||
if (efield->varflag != FixEfield::ATOM) {
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
if (mask[i] & efgroupbit) {
|
||||
if (region && !region->match(x[i][0],x[i][1],x[i][2])) continue;
|
||||
domain->unmap(x[i],image[i],unwrap);
|
||||
chi_field[i] = factor*(fx*unwrap[0] + fy*unwrap[1] + fz*unwrap[2]);
|
||||
chi_field[i] = factor*(ex*unwrap[0] + ey*unwrap[1] + ez*unwrap[2]);
|
||||
}
|
||||
}
|
||||
} else { // must use atom-style potential from FixEfield
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
if (mask[i] & efgroupbit) {
|
||||
if (region && !region->match(x[i][0],x[i][1],x[i][2])) continue;
|
||||
chi_field[i] = -efield->efield[i][3];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
16
src/atom.cpp
16
src/atom.cpp
@ -2129,14 +2129,18 @@ void Atom::add_molecule_atom(Molecule *onemol, int iatom, int ilocal, tagint off
|
||||
|
||||
// initialize custom per-atom properties to zero if present
|
||||
|
||||
for (int i = 0; i < nivector; ++i) ivector[i][ilocal] = 0;
|
||||
for (int i = 0; i < ndvector; ++i) dvector[i][ilocal] = 0.0;
|
||||
for (int i = 0; i < nivector; ++i)
|
||||
if (ivname[i]) ivector[i][ilocal] = 0;
|
||||
for (int i = 0; i < ndvector; ++i)
|
||||
if (dvname[i]) dvector[i][ilocal] = 0.0;
|
||||
for (int i = 0; i < niarray; ++i)
|
||||
for (int j = 0; j < icols[i]; ++j)
|
||||
iarray[i][ilocal][j] = 0;
|
||||
if (ianame[i])
|
||||
for (int j = 0; j < icols[i]; ++j)
|
||||
iarray[i][ilocal][j] = 0;
|
||||
for (int i = 0; i < ndarray; ++i)
|
||||
for (int j = 0; j < dcols[i]; ++j)
|
||||
darray[i][ilocal][j] = 0.0;
|
||||
if (daname[i])
|
||||
for (int j = 0; j < dcols[i]; ++j)
|
||||
darray[i][ilocal][j] = 0.0;
|
||||
|
||||
if (molecular != Atom::MOLECULAR) return;
|
||||
|
||||
|
||||
@ -41,7 +41,7 @@ using namespace FixConst;
|
||||
|
||||
FixEfield::FixEfield(LAMMPS *lmp, int narg, char **arg) :
|
||||
Fix(lmp, narg, arg), xstr(nullptr), ystr(nullptr), zstr(nullptr), estr(nullptr),
|
||||
idregion(nullptr), region(nullptr), efield(nullptr)
|
||||
pstr(nullptr), idregion(nullptr), region(nullptr), efield(nullptr)
|
||||
{
|
||||
if (narg < 6) utils::missing_cmd_args(FLERR, std::string("fix ") + style, error);
|
||||
|
||||
@ -58,7 +58,7 @@ FixEfield::FixEfield(LAMMPS *lmp, int narg, char **arg) :
|
||||
virial_global_flag = virial_peratom_flag = 1;
|
||||
|
||||
qe2f = force->qe2f;
|
||||
xstr = ystr = zstr = nullptr;
|
||||
xstyle = ystyle = zstyle = estyle = pstyle = NONE;
|
||||
|
||||
if (utils::strmatch(arg[3], "^v_")) {
|
||||
xstr = utils::strdup(arg[3] + 2);
|
||||
@ -100,11 +100,22 @@ FixEfield::FixEfield(LAMMPS *lmp, int narg, char **arg) :
|
||||
} else
|
||||
error->all(FLERR, "Unsupported argument for fix {} energy command: {}", style, arg[iarg]);
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg], "potential") == 0) {
|
||||
if (iarg + 2 > narg)
|
||||
utils::missing_cmd_args(FLERR, std::string("fix ") + style + "potential", error);
|
||||
if (utils::strmatch(arg[iarg + 1], "^v_")) {
|
||||
pstr = utils::strdup(arg[iarg + 1] + 2);
|
||||
} else
|
||||
error->all(FLERR, "Unsupported argument for fix {} energy command: {}", style, arg[iarg]);
|
||||
iarg += 2;
|
||||
} else {
|
||||
error->all(FLERR, "Unknown keyword for fix {} command: {}", style, arg[iarg]);
|
||||
}
|
||||
}
|
||||
|
||||
if (estr && pstr)
|
||||
error->all(FLERR, "Must not use energy and potential keywords at the same time with fix efield");
|
||||
|
||||
force_flag = 0;
|
||||
fsum[0] = fsum[1] = fsum[2] = fsum[3] = 0.0;
|
||||
|
||||
@ -122,6 +133,7 @@ FixEfield::~FixEfield()
|
||||
delete[] ystr;
|
||||
delete[] zstr;
|
||||
delete[] estr;
|
||||
delete[] pstr;
|
||||
delete[] idregion;
|
||||
memory->destroy(efield);
|
||||
}
|
||||
@ -157,43 +169,54 @@ void FixEfield::init()
|
||||
|
||||
if (xstr) {
|
||||
xvar = input->variable->find(xstr);
|
||||
if (xvar < 0) error->all(FLERR, "Variable {} for fix {} does not exist", xstr, style);
|
||||
if (xvar < 0) error->all(FLERR, "Variable {} for x-field in fix {} does not exist", xstr, style);
|
||||
if (input->variable->equalstyle(xvar))
|
||||
xstyle = EQUAL;
|
||||
else if (input->variable->atomstyle(xvar))
|
||||
xstyle = ATOM;
|
||||
else
|
||||
error->all(FLERR, "Variable {} for fix {} is invalid style", xstr, style);
|
||||
error->all(FLERR, "Variable {} for x-field in fix {} is invalid style", xstr, style);
|
||||
}
|
||||
|
||||
if (ystr) {
|
||||
yvar = input->variable->find(ystr);
|
||||
if (yvar < 0) error->all(FLERR, "Variable {} for fix {} does not exist", ystr, style);
|
||||
if (yvar < 0) error->all(FLERR, "Variable {} for y-field in fix {} does not exist", ystr, style);
|
||||
if (input->variable->equalstyle(yvar))
|
||||
ystyle = EQUAL;
|
||||
else if (input->variable->atomstyle(yvar))
|
||||
ystyle = ATOM;
|
||||
else
|
||||
error->all(FLERR, "Variable {} for fix {} is invalid style", ystr, style);
|
||||
error->all(FLERR, "Variable {} for y-field in fix {} is invalid style", ystr, style);
|
||||
}
|
||||
|
||||
if (zstr) {
|
||||
zvar = input->variable->find(zstr);
|
||||
if (zvar < 0) error->all(FLERR, "Variable {} for fix {} does not exist", zstr, style);
|
||||
if (zvar < 0) error->all(FLERR, "Variable {} for z-field in fix {} does not exist", zstr, style);
|
||||
if (input->variable->equalstyle(zvar))
|
||||
zstyle = EQUAL;
|
||||
else if (input->variable->atomstyle(zvar))
|
||||
zstyle = ATOM;
|
||||
else
|
||||
error->all(FLERR, "Variable {} for fix {} is invalid style", zstr, style);
|
||||
error->all(FLERR, "Variable {} for z-field in fix {} is invalid style", zstr, style);
|
||||
}
|
||||
|
||||
if (estr) {
|
||||
evar = input->variable->find(estr);
|
||||
if (evar < 0) error->all(FLERR, "Variable {} for fix {} does not exist", estr, style);
|
||||
if (evar < 0) error->all(FLERR, "Variable {} for energy in fix {} does not exist", estr, style);
|
||||
if (input->variable->atomstyle(evar))
|
||||
estyle = ATOM;
|
||||
else
|
||||
error->all(FLERR, "Variable {} for fix {} is invalid style", estr, style);
|
||||
} else
|
||||
estyle = NONE;
|
||||
error->all(FLERR, "Variable {} for energy in fix {} must be atom-style", estr, style);
|
||||
}
|
||||
|
||||
if (pstr) {
|
||||
pvar = input->variable->find(pstr);
|
||||
if (pvar < 0) error->all(FLERR, "Variable {} for potential in fix {} does not exist", pstr, style);
|
||||
if (input->variable->atomstyle(pvar))
|
||||
pstyle = ATOM;
|
||||
else
|
||||
error->all(FLERR, "Variable {} for potential in fix {} must be atom-style", pstr, style);
|
||||
}
|
||||
|
||||
// set index and check validity of region
|
||||
|
||||
@ -217,8 +240,10 @@ void FixEfield::init()
|
||||
|
||||
if (varflag == CONSTANT && estyle != NONE)
|
||||
error->all(FLERR, "Cannot use variable energy with constant efield in fix {}", style);
|
||||
if ((varflag == EQUAL || varflag == ATOM) && update->whichflag == 2 && estyle == NONE)
|
||||
error->all(FLERR, "Must use variable energy with fix {}", style);
|
||||
if (varflag == CONSTANT && pstyle != NONE)
|
||||
error->all(FLERR, "Cannot use variable potential with constant efield in fix {}", style);
|
||||
if ((varflag == EQUAL || varflag == ATOM) && update->whichflag == 2 && estyle == NONE && pstyle == NONE)
|
||||
error->all(FLERR, "Must use variable energy or potential with fix {} during minimization", style);
|
||||
|
||||
if (utils::strmatch(update->integrate_style, "^respa")) {
|
||||
ilevel_respa = (dynamic_cast<Respa *>(update->integrate))->nlevels - 1;
|
||||
@ -346,26 +371,7 @@ void FixEfield::post_force(int vflag)
|
||||
|
||||
} else {
|
||||
|
||||
modify->clearstep_compute();
|
||||
|
||||
if (xstyle == EQUAL) {
|
||||
ex = qe2f * input->variable->compute_equal(xvar);
|
||||
} else if (xstyle == ATOM) {
|
||||
input->variable->compute_atom(xvar, igroup, &efield[0][0], 4, 0);
|
||||
}
|
||||
if (ystyle == EQUAL) {
|
||||
ey = qe2f * input->variable->compute_equal(yvar);
|
||||
} else if (ystyle == ATOM) {
|
||||
input->variable->compute_atom(yvar, igroup, &efield[0][1], 4, 0);
|
||||
}
|
||||
if (zstyle == EQUAL) {
|
||||
ez = qe2f * input->variable->compute_equal(zvar);
|
||||
} else if (zstyle == ATOM) {
|
||||
input->variable->compute_atom(zvar, igroup, &efield[0][2], 4, 0);
|
||||
}
|
||||
if (estyle == ATOM) input->variable->compute_atom(evar, igroup, &efield[0][3], 4, 0);
|
||||
|
||||
modify->addstep_compute(update->ntimestep + 1);
|
||||
update_efield_variables();
|
||||
|
||||
// charge interactions
|
||||
// force = qE
|
||||
@ -395,7 +401,8 @@ void FixEfield::post_force(int vflag)
|
||||
}
|
||||
f[i][2] += fz;
|
||||
fsum[3] += fz;
|
||||
if (estyle == ATOM) fsum[0] += efield[i][3];
|
||||
if (pstyle == ATOM) fsum[0] += qe2f * q[i] * efield[i][3];
|
||||
else if (estyle == ATOM) fsum[0] += efield[i][3];
|
||||
}
|
||||
}
|
||||
|
||||
@ -470,3 +477,33 @@ double FixEfield::compute_vector(int n)
|
||||
}
|
||||
return fsum_all[n + 1];
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
update efield variables without doing anything else
|
||||
called by fix_qeq_reaxff
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void FixEfield::update_efield_variables()
|
||||
{
|
||||
modify->clearstep_compute();
|
||||
|
||||
if (xstyle == EQUAL) {
|
||||
ex = qe2f * input->variable->compute_equal(xvar);
|
||||
} else if (xstyle == ATOM) {
|
||||
input->variable->compute_atom(xvar, igroup, &efield[0][0], 4, 0);
|
||||
}
|
||||
if (ystyle == EQUAL) {
|
||||
ey = qe2f * input->variable->compute_equal(yvar);
|
||||
} else if (ystyle == ATOM) {
|
||||
input->variable->compute_atom(yvar, igroup, &efield[0][1], 4, 0);
|
||||
}
|
||||
if (zstyle == EQUAL) {
|
||||
ez = qe2f * input->variable->compute_equal(zvar);
|
||||
} else if (zstyle == ATOM) {
|
||||
input->variable->compute_atom(zvar, igroup, &efield[0][2], 4, 0);
|
||||
}
|
||||
if (pstyle == ATOM) input->variable->compute_atom(pvar, igroup, &efield[0][3], 4, 0);
|
||||
else if (estyle == ATOM) input->variable->compute_atom(evar, igroup, &efield[0][3], 4, 0);
|
||||
|
||||
modify->addstep_compute(update->ntimestep + 1);
|
||||
}
|
||||
|
||||
@ -46,10 +46,11 @@ class FixEfield : public Fix {
|
||||
protected:
|
||||
double ex, ey, ez;
|
||||
int varflag;
|
||||
char *xstr, *ystr, *zstr, *estr;
|
||||
char *xstr, *ystr, *zstr, *estr, *pstr;
|
||||
char *idregion;
|
||||
class Region *region;
|
||||
int xvar, yvar, zvar, evar, xstyle, ystyle, zstyle, estyle;
|
||||
int xvar, yvar, zvar, xstyle, ystyle, zstyle;
|
||||
int evar, pvar, estyle, pstyle;
|
||||
int ilevel_respa;
|
||||
double qe2f;
|
||||
int qflag, muflag;
|
||||
@ -59,6 +60,7 @@ class FixEfield : public Fix {
|
||||
|
||||
int force_flag;
|
||||
double fsum[4], fsum_all[4];
|
||||
void update_efield_variables();
|
||||
};
|
||||
} // namespace LAMMPS_NS
|
||||
#endif
|
||||
|
||||
@ -4040,8 +4040,8 @@ Region *Variable::region_function(char *id, int ivar)
|
||||
return 0 if not a match, 1 if successfully processed
|
||||
customize by adding a special function:
|
||||
sum(x),min(x),max(x),ave(x),trap(x),slope(x),
|
||||
gmask(x),rmask(x),grmask(x,y),next(x),
|
||||
is_file(x),is_ox(x),extract_setting(x),label2type(x,y)
|
||||
gmask(x),rmask(x),grmask(x,y),next(x),is_file(x),is_ox(x),
|
||||
extract_setting(x),label2type(x,y),is_typelabel(x,y)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int Variable::special_function(char *word, char *contents, Tree **tree, Tree **treestack,
|
||||
@ -4056,20 +4056,28 @@ int Variable::special_function(char *word, char *contents, Tree **tree, Tree **t
|
||||
strcmp(word,"ave") != 0 && strcmp(word,"trap") != 0 && strcmp(word,"slope") != 0 &&
|
||||
strcmp(word,"gmask") != 0 && strcmp(word,"rmask") != 0 && strcmp(word,"grmask") != 0 &&
|
||||
strcmp(word,"next") != 0 && strcmp(word,"is_file") != 0 && strcmp(word,"is_os") != 0 &&
|
||||
strcmp(word,"extract_setting") != 0 && strcmp(word,"label2type") != 0)
|
||||
strcmp(word,"extract_setting") != 0 && strcmp(word,"label2type") != 0 &&
|
||||
strcmp(word,"is_typelabel") != 0)
|
||||
return 0;
|
||||
|
||||
// process label2type() separately b/c its label arg can have commas in it
|
||||
|
||||
if (strcmp(word,"label2type") == 0) {
|
||||
if (strcmp(word,"label2type") == 0 || strcmp(word,"is_typelabel") == 0) {
|
||||
if (!atom->labelmapflag)
|
||||
print_var_error(FLERR,"Cannot use label2type() function without a labelmap",ivar);
|
||||
print_var_error(FLERR,fmt::format("Cannot use {}() function without a labelmap",word),ivar);
|
||||
|
||||
std::string contents_copy(contents);
|
||||
auto pos = contents_copy.find_first_of(',');
|
||||
if (pos == std::string::npos)
|
||||
print_var_error(FLERR, fmt::format("Invalid label2type({}) function in variable formula",
|
||||
contents_copy), ivar);
|
||||
if (pos == std::string::npos) {
|
||||
if (strcmp(word,"label2type") == 0) {
|
||||
print_var_error(FLERR, fmt::format("Invalid label2type({}) function in variable formula",
|
||||
contents_copy), ivar);
|
||||
} else {
|
||||
print_var_error(FLERR, fmt::format("Invalid is_typelabel({}) function in variable formula",
|
||||
contents_copy), ivar);
|
||||
}
|
||||
}
|
||||
|
||||
std::string typestr = contents_copy.substr(pos+1);
|
||||
std::string kind = contents_copy.substr(0, pos);
|
||||
|
||||
@ -4085,12 +4093,14 @@ int Variable::special_function(char *word, char *contents, Tree **tree, Tree **t
|
||||
} else if (kind == "improper") {
|
||||
value = atom->lmap->find(typestr,Atom::IMPROPER);
|
||||
} else {
|
||||
print_var_error(FLERR, fmt::format("Invalid kind {} in label2type() in variable",kind),ivar);
|
||||
print_var_error(FLERR, fmt::format("Invalid kind {} in {}() in variable", kind, word),ivar);
|
||||
}
|
||||
|
||||
if (value == -1)
|
||||
print_var_error(FLERR, fmt::format("Invalid {} type label {} in label2type() in variable",
|
||||
kind, typestr), ivar);
|
||||
if (strcmp(word,"label2type") == 0) {
|
||||
if (value == -1)
|
||||
print_var_error(FLERR, fmt::format("Invalid {} type label {} in label2type() in variable",
|
||||
kind, typestr), ivar);
|
||||
} else value = (value == -1) ? 0.0 : 1.0;
|
||||
|
||||
// save value in tree or on argstack
|
||||
|
||||
|
||||
@ -590,7 +590,7 @@ TEST_F(VariableTest, NextCommand)
|
||||
command("next five four"););
|
||||
}
|
||||
|
||||
TEST_F(VariableTest, Label2TypeAtomic)
|
||||
TEST_F(VariableTest, LabelMapAtomic)
|
||||
{
|
||||
BEGIN_HIDE_OUTPUT();
|
||||
command("region box block 0 2 0 2 0 2");
|
||||
@ -608,14 +608,20 @@ TEST_F(VariableTest, Label2TypeAtomic)
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("label2type(atom,N1)"), 2.0);
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("label2type(atom,O1)"), 3.0);
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("label2type(atom,H1)"), 4.0);
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(atom,N1)"), 1.0);
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(atom,N2)"), 0.0);
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(atom,O)"), 0.0);
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(atom,H1)"), 1.0);
|
||||
|
||||
TEST_FAILURE(".*ERROR: Variable t1: Invalid atom type label C1 in label2type.. in variable.*",
|
||||
command("print \"${t1}\""););
|
||||
TEST_FAILURE(".*ERROR: Invalid bond type label H1 in label2type.. in variable.*",
|
||||
variable->compute_equal("label2type(bond,H1)"););
|
||||
TEST_FAILURE(".*ERROR: Invalid kind xxx in label2type.. in variable.*",
|
||||
variable->compute_equal("label2type(xxx,H1)"););
|
||||
TEST_FAILURE(".*ERROR: Invalid kind xxx in is_typelabel.. in variable.*",
|
||||
variable->compute_equal("is_typelabel(xxx,H1)"););
|
||||
}
|
||||
|
||||
TEST_F(VariableTest, Label2TypeMolecular)
|
||||
TEST_F(VariableTest, LabelMapMolecular)
|
||||
{
|
||||
if (!info->has_style("atom", "full")) GTEST_SKIP();
|
||||
|
||||
@ -637,6 +643,14 @@ TEST_F(VariableTest, Label2TypeMolecular)
|
||||
command("variable a2 equal \"\"\"label2type(angle,N2'-C1\"-N2')\"\"\"");
|
||||
command("variable d1 equal label2type(dihedral,C1-N2-C1-N2)");
|
||||
command("variable i1 equal label2type(improper,C1-N2-C1-N2)");
|
||||
|
||||
command("variable l1 equal is_typelabel(atom,C2)+is_typelabel(bond,C2-N1)"
|
||||
"+is_typelabel(bond,[X1][Y1])+is_typelabel(angle,C1-C2-N1)"
|
||||
"+is_typelabel(dihedral,N2-C1-C1-N2)+is_typelabel(improper,N2-C1-C1-N2)");
|
||||
command("variable l2 equal is_typelabel(atom,C1)+is_typelabel(bond,C1-N2)"
|
||||
"+is_typelabel(bond,[C1][C1])+is_typelabel(angle,C1-N2-C1)"
|
||||
"+is_typelabel(dihedral,C1-N2-C1-N2)+is_typelabel(improper,C1-N2-C1-N2)");
|
||||
|
||||
END_HIDE_OUTPUT();
|
||||
|
||||
ASSERT_THAT(variable->retrieve("t1"), StrEq("1"));
|
||||
@ -647,6 +661,30 @@ TEST_F(VariableTest, Label2TypeMolecular)
|
||||
ASSERT_THAT(variable->retrieve("a2"), StrEq("2"));
|
||||
ASSERT_THAT(variable->retrieve("d1"), StrEq("1"));
|
||||
ASSERT_THAT(variable->retrieve("i1"), StrEq("1"));
|
||||
ASSERT_THAT(variable->retrieve("l1"), StrEq("0"));
|
||||
ASSERT_THAT(variable->retrieve("l2"), StrEq("6"));
|
||||
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(atom,N2')"), 1.0);
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(atom,\"N2'\")"), 0.0);
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(bond,C1-N2)"), 1.0);
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(bond,C2-N1)"), 0.0);
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(bond,[C1][C1])"), 1.0);
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(bond,[X1][Y1])"), 0.0);
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(angle,C1-C2-N1)"), 0.0);
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(angle,C1-N2-C1)"), 1.0);
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(dihedral,C1-N2-C1-N2)"), 1.0);
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(dihedral,N2-C1-C1-N2)"), 0.0);
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(improper,C1-N2-C1-N2)"), 1.0);
|
||||
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(improper,N2-C1-C1-N2)"), 0.0);
|
||||
|
||||
TEST_FAILURE(".*ERROR: Invalid bond type label H1 in label2type.. in variable.*",
|
||||
variable->compute_equal("label2type(bond,H1)"););
|
||||
TEST_FAILURE(".*ERROR: Invalid angle type label H1 in label2type.. in variable.*",
|
||||
variable->compute_equal("label2type(angle,H1)"););
|
||||
TEST_FAILURE(".*ERROR: Invalid dihedral type label H1 in label2type.. in variable.*",
|
||||
variable->compute_equal("label2type(dihedral,H1)"););
|
||||
TEST_FAILURE(".*ERROR: Invalid improper type label H1 in label2type.. in variable.*",
|
||||
variable->compute_equal("label2type(improper,H1)"););
|
||||
}
|
||||
|
||||
TEST_F(VariableTest, Format)
|
||||
|
||||
Reference in New Issue
Block a user