Merge branch 'develop' into collected-small-changes

This commit is contained in:
Axel Kohlmeyer
2022-05-06 11:45:51 -04:00
15 changed files with 454 additions and 179 deletions

View File

@ -947,6 +947,12 @@ if(PKG_KSPACE)
else() else()
message(STATUS "Kokkos FFT: cuFFT") message(STATUS "Kokkos FFT: cuFFT")
endif() endif()
elseif(Kokkos_ENABLE_HIP)
if(FFT STREQUAL "KISS")
message(STATUS "Kokkos FFT: KISS")
else()
message(STATUS "Kokkos FFT: hipFFT")
endif()
else() else()
message(STATUS "Kokkos FFT: ${FFT}") message(STATUS "Kokkos FFT: ${FFT}")
endif() endif()

View File

@ -130,6 +130,11 @@ if(PKG_KSPACE)
target_compile_definitions(lammps PRIVATE -DFFT_CUFFT) target_compile_definitions(lammps PRIVATE -DFFT_CUFFT)
target_link_libraries(lammps PRIVATE cufft) target_link_libraries(lammps PRIVATE cufft)
endif() endif()
elseif(Kokkos_ENABLE_HIP)
if(NOT (FFT STREQUAL "KISS"))
target_compile_definitions(lammps PRIVATE -DFFT_HIPFFT)
target_link_libraries(lammps PRIVATE hipfft)
endif()
endif() endif()
endif() endif()

View File

@ -641,6 +641,20 @@ This list was last updated for version 3.5.0 of the Kokkos library.
-D CMAKE_CXX_COMPILER=${HOME}/lammps/lib/kokkos/bin/nvcc_wrapper -D CMAKE_CXX_COMPILER=${HOME}/lammps/lib/kokkos/bin/nvcc_wrapper
For AMD or NVIDIA GPUs using HIP, set these variables:
.. code-block:: bash
-D Kokkos_ARCH_HOSTARCH=yes # HOSTARCH = HOST from list above
-D Kokkos_ARCH_GPUARCH=yes # GPUARCH = GPU from list above
-D Kokkos_ENABLE_HIP=yes
-D Kokkos_ENABLE_OPENMP=yes
This will enable FFTs on the GPU, either by the internal KISSFFT library
or with the hipFFT wrapper library, which will call out to the
platform-appropriate vendor library: rocFFT on AMD GPUs or cuFFT on
NVIDIA GPUs.
To simplify compilation, four preset files are included in the To simplify compilation, four preset files are included in the
``cmake/presets`` folder, ``kokkos-serial.cmake``, ``cmake/presets`` folder, ``kokkos-serial.cmake``,
``kokkos-openmp.cmake``, ``kokkos-cuda.cmake``, and ``kokkos-openmp.cmake``, ``kokkos-cuda.cmake``, and
@ -707,6 +721,15 @@ This list was last updated for version 3.5.0 of the Kokkos library.
KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd)
CC = mpicxx -cxx=$(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper CC = mpicxx -cxx=$(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper
For AMD or NVIDIA GPUs using HIP:
.. code-block:: make
KOKKOS_DEVICES = HIP
KOKKOS_ARCH = HOSTARCH,GPUARCH # HOSTARCH = HOST from list above that is hosting the GPU
# GPUARCH = GPU from list above
FFT_INC = -DFFT_HIPFFT # enable use of hipFFT (optional)
FFT_LIB = -lhipfft # link to hipFFT library
Advanced KOKKOS compilation settings Advanced KOKKOS compilation settings
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -26,11 +26,11 @@ Syntax
region-ID = create atoms within this region, use NULL for entire simulation box region-ID = create atoms within this region, use NULL for entire simulation box
* zero or more keyword/value pairs may be appended * zero or more keyword/value pairs may be appended
* keyword = *mol* or *basis* or *ratio* or *subset* or *remap* or *var* or *set* or *rotate* or *units* * keyword = *mol* or *basis* or *ratio* or *subset* or *remap* or *var* or *set* or *rotate* or *overlap* or *maxtry* or *units*
.. parsed-literal:: .. parsed-literal::
*mol* value = template-ID seed *mol* values = template-ID seed
template-ID = ID of molecule template specified in a separate :doc:`molecule <molecule>` command template-ID = ID of molecule template specified in a separate :doc:`molecule <molecule>` command
seed = random # seed (positive integer) seed = random # seed (positive integer)
*basis* values = M itype *basis* values = M itype
@ -50,6 +50,10 @@ Syntax
*rotate* values = theta Rx Ry Rz *rotate* values = theta Rx Ry Rz
theta = rotation angle for single molecule (degrees) theta = rotation angle for single molecule (degrees)
Rx,Ry,Rz = rotation vector for single molecule Rx,Ry,Rz = rotation vector for single molecule
*overlap* value = Doverlap
Doverlap = only insert if at least this distance from all existing atoms
*maxtry* value = Ntry
Ntry = number of attempts to insert a particle before failure
*units* value = *lattice* or *box* *units* value = *lattice* or *box*
*lattice* = the geometry is defined in lattice units *lattice* = the geometry is defined in lattice units
*box* = the geometry is defined in simulation box units *box* = the geometry is defined in simulation box units
@ -64,20 +68,22 @@ Examples
create_atoms 3 region regsphere basis 2 3 ratio 0.5 74637 create_atoms 3 region regsphere basis 2 3 ratio 0.5 74637
create_atoms 3 single 0 0 5 create_atoms 3 single 0 0 5
create_atoms 1 box var v set x xpos set y ypos create_atoms 1 box var v set x xpos set y ypos
create_atoms 2 random 50 12345 NULL overlap 2.0 maxtry 50
Description Description
""""""""""" """""""""""
This command creates atoms (or molecules) on a lattice, or a single This command creates atoms (or molecules) within the simulation box,
atom (or molecule), or a random collection of atoms (or molecules), as either on a lattice, or a single atom (or molecule), or a random
an alternative to reading in their coordinates explicitly via a collection of atoms (or molecules). It is an alternative to reading
:doc:`read_data <read_data>` or :doc:`read_restart <read_restart>` in atom coordinates explicitly via a :doc:`read_data <read_data>` or
command. A simulation box must already exist, which is typically :doc:`read_restart <read_restart>` command. A simulation box must
created via the :doc:`create_box <create_box>` command. Before using already exist, which is typically created via the :doc:`create_box
this command, a lattice must also be defined using the <create_box>` command. Before using this command, a lattice must also
:doc:`lattice <lattice>` command, unless you specify the *single* style be defined using the :doc:`lattice <lattice>` command, unless you
with units = box or the *random* style. For the remainder of this doc specify the *single* style with units = box or the *random* style.
page, a created atom or molecule is referred to as a "particle". For the remainder of this doc page, a created atom or molecule is
referred to as a "particle".
If created particles are individual atoms, they are assigned the If created particles are individual atoms, they are assigned the
specified atom *type*, though this can be altered via the *basis* specified atom *type*, though this can be altered via the *basis*
@ -97,58 +103,70 @@ particular dimension, LAMMPS is careful to put exactly one particle at
the boundary (on either side of the box), not zero or two. the boundary (on either side of the box), not zero or two.
For the *region* style, a geometric volume is filled with particles on For the *region* style, a geometric volume is filled with particles on
the lattice. This volume what is inside the simulation box and is the lattice. This volume is what is both inside the simulation box
also consistent with the region volume. See the :doc:`region <region>` and also consistent with the region volume. See the :doc:`region
command for details. Note that a region can be specified so that its <region>` command for details. Note that a region can be specified so
"volume" is either inside or outside a geometric boundary. Also note that its "volume" is either inside or outside its geometric boundary.
that if your region is the same size as a periodic simulation box (in Also note that if a region is the same size as a periodic simulation
some dimension), LAMMPS does not implement the same logic described box (in some dimension), LAMMPS does NOT implement the same logic
above as for the *box* style, to insure exactly one particle at described above for the *box* style, to insure exactly one particle at
periodic boundaries. if this is what you desire, you should either periodic boundaries. If this is desired, you should either use the
use the *box* style, or tweak the region size to get precisely the *box* style, or tweak the region size to get precisely the particles
particles you want. you want.
For the *single* style, a single particle is added to the system at For the *single* style, a single particle is added to the system at
the specified coordinates. This can be useful for debugging purposes the specified coordinates. This can be useful for debugging purposes
or to create a tiny system with a handful of particles at specified or to create a tiny system with a handful of particles at specified
positions. positions.
For the *random* style, N particles are added to the system at For the *random* style, *N* particles are added to the system at
randomly generated coordinates, which can be useful for generating an randomly generated coordinates, which can be useful for generating an
amorphous system. The particles are created one by one using the amorphous system. The particles are created one by one using the
specified random number *seed*, resulting in the same set of particles specified random number *seed*, resulting in the same set of particle
coordinates, independent of how many processors are being used in the coordinates, independent of how many processors are being used in the
simulation. If the *region-ID* argument is specified as NULL, then simulation. Unless the *overlap* keyword is specified, particles
the created particles will be anywhere in the simulation box. If a created by the *random* style will typically be highly overlapped.
Various additional criteria can be used to accept or reject a random
particle insertion; see the keyword discussion below. Multiple
attempts per particle are made (see the *maxtry* keyword) until the
insertion is either successful or fails. If this command fails to add
all requested *N* particles, a warning will be output.
If the *region-ID* argument is specified as NULL, then the randomly
created particles will be anywhere in the simulation box. If a
*region-ID* is specified, a geometric volume is filled which is both *region-ID* is specified, a geometric volume is filled which is both
inside the simulation box and is also consistent with the region inside the simulation box and is also consistent with the region
volume. See the :doc:`region <region>` command for details. Note that volume. See the :doc:`region <region>` command for details. Note
a region can be specified so that its "volume" is either inside or that a region can be specified so that its "volume" is either inside
outside a geometric boundary. or outside its geometric boundary.
.. note:: Note that the create_atoms command adds particles to those that
already exist. This means it can be used to add particles to a system
previously read in from a data or restart file. Or the create_atoms
command can be used multiple times, to add multiple sets of particles
to the simulation. For example, grain boundaries can be created, by
interleaving the create_atoms command with :doc:`lattice <lattice>`
commands specifying different orientations.
Particles generated by the *random* style will typically be When this command is used, care should be taken to insure the
highly overlapped which will cause many interatomic potentials to resulting system does not contain particles which are highly
compute large energies and forces. Thus you should either perform an overlapped. Such overlaps will cause many interatomic potentials to
:doc:`energy minimization <minimize>` or run dynamics with :doc:`fix nve/limit <fix_nve_limit>` to equilibrate such a system, before compute huge energies and forces, leading to bad dynamics. There are
running normal dynamics. several strategies to avoid this problem:
Note that this command adds particles to those that already exist. * Use the :doc:`delete_atoms overlap <delete_atoms>` command after
This means it can be used to add particles to a system previously read create_atoms. For example, this strategy can be used to overlay and
in from a data or restart file. Or the create_atoms command can be surround a large protein molecule with a volume of water molecules,
used multiple times, to add multiple sets of particles to the then delete water molecules that overlap with the protein atoms.
simulation. For example, grain boundaries can be created, by
interleaving create_atoms with :doc:`lattice <lattice>` commands
specifying different orientations. By using the create_atoms command
in conjunction with the :doc:`delete_atoms <delete_atoms>` command,
reasonably complex geometries can be created, or a protein can be
solvated with a surrounding box of water molecules.
In all these cases, care should be taken to insure that new atoms do * For the *random* style, use the optional *overlap* keyword to avoid
not overlap existing atoms inappropriately, especially if molecules overlaps when each new particle is created.
are being added. The :doc:`delete_atoms <delete_atoms>` command can be
used to remove overlapping atoms or molecules. * Before running dynamics on an overlapped system, perform an
:doc:`energy minimization <minimize>`. Or run initial dynamics with
:doc:`pair_style soft <pair_soft>` or with :doc:`fix nve/limit
<fix_nve_limit>` to un-overlap the particles, before running normal
dynamics.
.. note:: .. note::
@ -156,12 +174,13 @@ used to remove overlapping atoms or molecules.
that are outside the simulation box; they will just be ignored by that are outside the simulation box; they will just be ignored by
LAMMPS. This is true even if you are using shrink-wrapped box LAMMPS. This is true even if you are using shrink-wrapped box
boundaries, as specified by the :doc:`boundary <boundary>` command. boundaries, as specified by the :doc:`boundary <boundary>` command.
However, you can first use the :doc:`change_box <change_box>` command to However, you can first use the :doc:`change_box <change_box>`
temporarily expand the box, then add atoms via create_atoms, then command to temporarily expand the box, then add atoms via
finally use change_box command again if needed to re-shrink-wrap the create_atoms, then finally use change_box command again if needed
new atoms. See the :doc:`change_box <change_box>` page for an to re-shrink-wrap the new atoms. See the :doc:`change_box
example of how to do this, using the create_atoms *single* style to <change_box>` doc page for an example of how to do this, using the
insert a new atom outside the current simulation box. create_atoms *single* style to insert a new atom outside the
current simulation box.
---------- ----------
@ -180,17 +199,19 @@ Using a lattice to add molecules, e.g. via the *box* or *region* or
points, except that entire molecules are added at each point, i.e. on points, except that entire molecules are added at each point, i.e. on
the point defined by each basis atom in the unit cell as it tiles the the point defined by each basis atom in the unit cell as it tiles the
simulation box or region. This is done by placing the geometric simulation box or region. This is done by placing the geometric
center of the molecule at the lattice point, and giving the molecule a center of the molecule at the lattice point, and (by default) giving
random orientation about the point. The random *seed* specified with the molecule a random orientation about the point. The random *seed*
the *mol* keyword is used for this operation, and the random numbers specified with the *mol* keyword is used for this operation, and the
generated by each processor are different. This means the coordinates random numbers generated by each processor are different. This means
of individual atoms (in the molecules) will be different when running the coordinates of individual atoms (in the molecules) will be
on different numbers of processors, unlike when atoms are being different when running on different numbers of processors, unlike when
created in parallel. atoms are being created in parallel.
Also note that because of the random rotations, it may be important to Note that with random rotations, it may be important to use a lattice
use a lattice with a large enough spacing that adjacent molecules will with a large enough spacing that adjacent molecules will not overlap,
not overlap, regardless of their relative orientations. regardless of their relative orientations. See the description of the
*rotate* keyword below, which overrides the default random orientation
and inserts all molecules at a specified orientation.
.. note:: .. note::
@ -204,7 +225,7 @@ not overlap, regardless of their relative orientations.
---------- ----------
This is the meaning of the other allowed keywords. This is the meaning of the other optional keywords.
The *basis* keyword is only used when atoms (not molecules) are being The *basis* keyword is only used when atoms (not molecules) are being
created. It specifies an atom type that will be assigned to specific created. It specifies an atom type that will be assigned to specific
@ -234,18 +255,24 @@ and no particle is created if its position is outside the box.
The *var* and *set* keywords can be used together to provide a The *var* and *set* keywords can be used together to provide a
criterion for accepting or rejecting the addition of an individual criterion for accepting or rejecting the addition of an individual
atom, based on its coordinates. The *name* specified for the *var* atom, based on its coordinates. They apply to all styles except
keyword is the name of an :doc:`equal-style variable <variable>` which *single*. The *name* specified for the *var* keyword is the name of
should evaluate to a zero or non-zero value based on one or two or an :doc:`equal-style variable <variable>` which should evaluate to a
three variables which will store the x, y, or z coordinates of an atom zero or non-zero value based on one or two or three variables which
(one variable per coordinate). If used, these other variables must be will store the x, y, or z coordinates of an atom (one variable per
:doc:`internal-style variables <variable>` defined in the input script; coordinate). If used, these other variables must be
their initial numeric value can be anything. They must be :doc:`internal-style variables <variable>` defined in the input
script; their initial numeric value can be anything. They must be
internal-style variables, because this command resets their values internal-style variables, because this command resets their values
directly. The *set* keyword is used to identify the names of these directly. The *set* keyword is used to identify the names of these
other variables, one variable for the x-coordinate of a created atom, other variables, one variable for the x-coordinate of a created atom,
one for y, and one for z. one for y, and one for z.
.. figure:: img/sinusoid.jpg
:figwidth: 50%
:align: right
:target: _images/sinusoid.jpg
When an atom is created, its x,y,z coordinates become the values for When an atom is created, its x,y,z coordinates become the values for
any *set* variable that is defined. The *var* variable is then any *set* variable that is defined. The *var* variable is then
evaluated. If the returned value is 0.0, the atom is not created. If evaluated. If the returned value is 0.0, the atom is not created. If
@ -259,6 +286,10 @@ the sinusoid would appear to be "smoother". Also note the use of the
"xlat" and "ylat" :doc:`thermo_style <thermo_style>` keywords which "xlat" and "ylat" :doc:`thermo_style <thermo_style>` keywords which
converts lattice spacings to distance. converts lattice spacings to distance.
.. only:: html
(Click on the image for a larger version)
.. code-block:: LAMMPS .. code-block:: LAMMPS
dimension 2 dimension 2
@ -274,13 +305,7 @@ converts lattice spacings to distance.
create_atoms 1 box var v set x xx set y yy create_atoms 1 box var v set x xx set y yy
write_dump all atom sinusoid.lammpstrj write_dump all atom sinusoid.lammpstrj
.. image:: img/sinusoid.jpg -----
:scale: 50%
:align: center
.. raw:: html
Click on the image for a larger version.
The *rotate* keyword allows specification of the orientation The *rotate* keyword allows specification of the orientation
at which molecules are inserted. The axis of rotation is at which molecules are inserted. The axis of rotation is
@ -291,10 +316,72 @@ the atoms around the rotation axis is consistent with the right-hand
rule: if your right-hand's thumb points along *R*, then your fingers rule: if your right-hand's thumb points along *R*, then your fingers
wrap around the axis in the direction of rotation. wrap around the axis in the direction of rotation.
The *overlap* keyword only applies to the *random* style. It prevents
newly created particles from being created closer than the specified
*Doverlap* distance from any other particle. When the particles being
created are molecules, the radius of the molecule (from its geometric
center) is added to *Doverlap*. If particles have finite size (see
:doc:`atom_style sphere <atom_style>` for example) *Doverlap* should
be specified large enough to include the particle size in the
non-overlapping criterion.
.. note::
Checking for overlaps is a costly O(N(N+M)) operation for inserting
*N* new particles into a system with *M* existing particles. This
is because distances to all *M* existing particles are computed for
each new particle that is added. Thus the intended use of this
keyword is to add relatively small numbers of particles to systems
which remain at a relatively low density even after the new
particles are created. Careful use of the *maxtry* keyword in
combination with *overlap* is recommended. See the discussion
above about systems with overlapped particles for alternate
strategies that allow for overlapped insertions.
The *maxtry* keyword only applies to the *random* style. It limits
the number of attempts to generate valid coordinates for a single new
particle that satisfy all requirements imposed by the *region*, *var*,
and *overlap* keywords. The default is 10 attempts per particle
before the loop over the requested *N* particles advances to the next
particle. Note that if insertion success is unlikely (e.g. inserting
new particles into a dense system using the *overlap* keyword),
setting the *maxtry* keyword to a large value may result in this
command running for a long time.
.. figure:: img/overlap.png
:figwidth: 30%
:align: right
:target: _images/overlap.png
Here is an example for the *random* style using these commands
.. code-block:: LAMMPS
units lj
dimension 2
region box block 0 50 0 50 -0.5 0.5
create_box 1 box
create_atoms 1 random 2000 13487 NULL overlap 1.0 maxtry 50
pair_style lj/cut 2.5
pair_coeff 1 1 1.0 1.0 2.5
to produce a system as shown in the image with 1520 particles (out of
2000 requested) that are moderately dense and which have no overlaps
sufficient to prevent the LJ pair_style from running properly (because
the overlap criterion = 1.0). The create_atoms command ran for 0.3 s
on a single CPU core.
.. only:: html
(Click on the image for a larger version)
-----
The *units* keyword determines the meaning of the distance units used The *units* keyword determines the meaning of the distance units used
to specify the coordinates of the one particle created by the *single* to specify the coordinates of the one particle created by the *single*
style. A *box* value selects standard distance units as defined by style, or the overlap distance *Doverlap* by the *overlap* keyword. A
the :doc:`units <units>` command, e.g. Angstroms for units = real or *box* value selects standard distance units as defined by the
:doc:`units <units>` command, e.g. Angstroms for units = real or
metal. A *lattice* value means the distance units are in lattice metal. A *lattice* value means the distance units are in lattice
spacings. spacings.
@ -315,9 +402,10 @@ assigned to created molecules in a similar fashion.
Aside from their ID, atom type, and xyz position, other properties of Aside from their ID, atom type, and xyz position, other properties of
created atoms are set to default values, depending on which quantities created atoms are set to default values, depending on which quantities
are defined by the chosen :doc:`atom style <atom_style>`. See the :doc:`atom style <atom_style>` command for more details. See the are defined by the chosen :doc:`atom style <atom_style>`. See the
:doc:`set <set>` and :doc:`velocity <velocity>` commands for info on how :doc:`atom style <atom_style>` command for more details. See the
to change these values. :doc:`set <set>` and :doc:`velocity <velocity>` commands for info on
how to change these values.
* charge = 0.0 * charge = 0.0
* dipole moment magnitude = 0.0 * dipole moment magnitude = 0.0
@ -373,4 +461,4 @@ Default
The default for the *basis* keyword is that all created atoms are The default for the *basis* keyword is that all created atoms are
assigned the argument *type* as their atom type (when single atoms are assigned the argument *type* as their atom type (when single atoms are
being created). The other defaults are *remap* = no, *rotate* = being created). The other defaults are *remap* = no, *rotate* =
random, and *units* = lattice. random, *overlap* not checked, *maxtry* = 10, and *units* = lattice.

BIN
doc/src/img/overlap.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 572 KiB

View File

@ -1326,6 +1326,7 @@ hiID
Hijazi Hijazi
Hilger Hilger
Hinestrosa Hinestrosa
hipFFT
histo histo
histogrammed histogrammed
histogramming histogramming
@ -1951,6 +1952,7 @@ maxsize
maxspecial maxspecial
maxSteps maxSteps
maxstrain maxstrain
maxtry
maxwell maxwell
Maxwellian Maxwellian
maxX maxX
@ -2951,6 +2953,7 @@ rnage
rng rng
rNEMD rNEMD
ro ro
rocFFT
Rochus Rochus
Rockett Rockett
rocksalt rocksalt

1
src/.gitignore vendored
View File

@ -627,6 +627,7 @@
/ewald.h /ewald.h
/ewald_cg.cpp /ewald_cg.cpp
/ewald_cg.h /ewald_cg.h
/ewald_const.h
/ewald_dipole.cpp /ewald_dipole.cpp
/ewald_dipole.h /ewald_dipole.h
/ewald_dipole_spin.cpp /ewald_dipole_spin.cpp

View File

@ -46,13 +46,17 @@ FFT3dKokkos<DeviceType>::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int
#if defined(FFT_MKL) #if defined(FFT_MKL)
if (ngpus > 0 && execution_space == Device) if (ngpus > 0 && execution_space == Device)
lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos CUDA on GPUs"); lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos on GPUs");
#elif defined(FFT_FFTW3) #elif defined(FFT_FFTW3)
if (ngpus > 0 && execution_space == Device) if (ngpus > 0 && execution_space == Device)
lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos CUDA on GPUs"); lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos on GPUs");
#elif defined(FFT_CUFFT) #elif defined(FFT_CUFFT)
if (ngpus > 0 && execution_space == Host) if (ngpus > 0 && execution_space == Host)
lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos CUDA on the host CPUs"); lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos on the host CPUs");
#elif defined(FFT_HIPFFT)
if (ngpus > 0 && execution_space == Host)
lmp->error->all(FLERR,"Cannot use the hipFFT library with Kokkos on the host CPUs");
#elif defined(FFT_KISSFFT) #elif defined(FFT_KISSFFT)
// The compiler can't statically determine the stack size needed for // The compiler can't statically determine the stack size needed for
// recursive function calls in KISS FFT and the default per-thread // recursive function calls in KISS FFT and the default per-thread
@ -145,7 +149,7 @@ public:
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const { void operator() (const int &i) const {
#if defined(FFT_FFTW3) || defined(FFT_CUFFT) #if defined(FFT_FFTW3) || defined(FFT_CUFFT) || defined(FFT_HIPFFT)
FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i);
*(out_ptr++) *= norm; *(out_ptr++) *= norm;
*(out_ptr++) *= norm; *(out_ptr++) *= norm;
@ -227,6 +231,8 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
#elif defined(FFT_CUFFT) #elif defined(FFT_CUFFT)
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
#elif defined(FFT_HIPFFT)
hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
#else #else
typename FFT_AT::t_FFT_DATA_1d d_tmp = typename FFT_AT::t_FFT_DATA_1d d_tmp =
typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
@ -271,6 +277,8 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
#elif defined(FFT_CUFFT) #elif defined(FFT_CUFFT)
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
#elif defined(FFT_HIPFFT)
hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
#else #else
d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
if (flag == 1) if (flag == 1)
@ -313,6 +321,8 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
#elif defined(FFT_CUFFT) #elif defined(FFT_CUFFT)
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
#elif defined(FFT_HIPFFT)
hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
#else #else
d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
if (flag == 1) if (flag == 1)
@ -699,6 +709,23 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
&nslow,1,plan->length3, &nslow,1,plan->length3,
CUFFT_TYPE,plan->total3/plan->length3); CUFFT_TYPE,plan->total3/plan->length3);
#elif defined(FFT_HIPFFT)
hipfftPlanMany(&(plan->plan_fast), 1, &nfast,
&nfast,1,plan->length1,
&nfast,1,plan->length1,
HIPFFT_TYPE,plan->total1/plan->length1);
hipfftPlanMany(&(plan->plan_mid), 1, &nmid,
&nmid,1,plan->length2,
&nmid,1,plan->length2,
HIPFFT_TYPE,plan->total2/plan->length2);
hipfftPlanMany(&(plan->plan_slow), 1, &nslow,
&nslow,1,plan->length3,
&nslow,1,plan->length3,
HIPFFT_TYPE,plan->total3/plan->length3);
#else /* FFT_KISS */ #else /* FFT_KISS */
kissfftKK = new KissFFTKokkos<DeviceType>(); kissfftKK = new KissFFTKokkos<DeviceType>();
@ -863,6 +890,10 @@ void FFT3dKokkos<DeviceType>::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
#elif defined(FFT_HIPFFT)
hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
#else #else
kiss_fft_functor<DeviceType> f; kiss_fft_functor<DeviceType> f;
typename FFT_AT::t_FFT_DATA_1d d_tmp = typename FFT_AT::t_FFT_DATA_1d d_tmp =

View File

@ -60,6 +60,10 @@ struct fft_plan_3d_kokkos {
cufftHandle plan_fast; cufftHandle plan_fast;
cufftHandle plan_mid; cufftHandle plan_mid;
cufftHandle plan_slow; cufftHandle plan_slow;
#elif defined(FFT_HIPFFT)
hipfftHandle plan_fast;
hipfftHandle plan_mid;
hipfftHandle plan_slow;
#else #else
kiss_fft_state_kokkos<DeviceType> cfg_fast_forward; kiss_fft_state_kokkos<DeviceType> cfg_fast_forward;
kiss_fft_state_kokkos<DeviceType> cfg_fast_backward; kiss_fft_state_kokkos<DeviceType> cfg_fast_backward;

View File

@ -36,8 +36,8 @@
#endif #endif
// with KOKKOS in CUDA mode we can only have // with KOKKOS in CUDA or HIP mode we can only have
// CUFFT or KISSFFT, thus undefine all other // CUFFT/HIPFFT or KISSFFT, thus undefine all other
// FFTs here, since they may be valid in fft3d.cpp // FFTs here, since they may be valid in fft3d.cpp
#ifdef KOKKOS_ENABLE_CUDA #ifdef KOKKOS_ENABLE_CUDA
@ -53,10 +53,26 @@
# if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT) # if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT)
# define FFT_KISSFFT # define FFT_KISSFFT
# endif # endif
#elif defined(KOKKOS_ENABLE_HIP)
# if defined(FFT_FFTW)
# undef FFT_FFTW
# endif
# if defined(FFT_FFTW3)
# undef FFT_FFTW3
# endif
# if defined(FFT_MKL)
# undef FFT_MKL
# endif
# if !defined(FFT_HIPFFT) && !defined(FFT_KISSFFT)
# define FFT_KISSFFT
# endif
#else #else
# if defined(FFT_CUFFT) # if defined(FFT_CUFFT)
# error "Must enable CUDA with KOKKOS to use -DFFT_CUFFT" # error "Must enable CUDA with KOKKOS to use -DFFT_CUFFT"
# endif # endif
# if defined(FFT_HIPFFT)
# error "Must enable HIP with KOKKOS to use -DFFT_HIPFFT"
# endif
// if user set FFTW, it means FFTW3 // if user set FFTW, it means FFTW3
# ifdef FFT_FFTW # ifdef FFT_FFTW
# define FFT_FFTW3 # define FFT_FFTW3
@ -97,6 +113,17 @@
#define CUFFT_TYPE CUFFT_Z2Z #define CUFFT_TYPE CUFFT_Z2Z
typedef cufftDoubleComplex FFT_DATA; typedef cufftDoubleComplex FFT_DATA;
#endif #endif
#elif defined(FFT_HIPFFT)
#include "hipfft.h"
#if defined(FFT_SINGLE)
#define hipfftExec hipfftExecC2C
#define HIPFFT_TYPE HIPFFT_C2C
typedef hipfftComplex FFT_DATA;
#else
#define hipfftExec hipfftExecZ2Z
#define HIPFFT_TYPE HIPFFT_Z2Z
typedef hipfftDoubleComplex FFT_DATA;
#endif
#else #else
#if defined(FFT_SINGLE) #if defined(FFT_SINGLE)
#define kiss_fft_scalar float #define kiss_fft_scalar float

View File

@ -1,4 +1,4 @@
# crusher_kokkos = KOKKOS/HIP, AMD MI250X GPU and AMD EPYC 7A53 "Optimized 3rd Gen EPYC" CPU, Cray MPICH, hipcc compiler # crusher_kokkos = KOKKOS/HIP, AMD MI250X GPU and AMD EPYC 7A53 "Optimized 3rd Gen EPYC" CPU, Cray MPICH, hipcc compiler, hipFFT
SHELL = /bin/sh SHELL = /bin/sh
@ -54,9 +54,12 @@ MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa
# PATH = path for FFT library # PATH = path for FFT library
# LIB = name of FFT library # LIB = name of FFT library
FFT_INC = MY_HIP_EXE = $(shell which hipcc)
MY_HIP_PATH = $(dir ${MY_HIP_EXE})
FFT_INC = -DFFT_HIPFFT
FFT_PATH = FFT_PATH =
FFT_LIB = FFT_LIB = -L${MY_HIP_PATH}../lib -lhipfft
# JPEG and/or PNG library # JPEG and/or PNG library
# see discussion in Section 3.5.4 of manual # see discussion in Section 3.5.4 of manual

View File

@ -1,4 +1,4 @@
# spock_kokkos = KOKKOS/HIP, AMD MI100 GPU and AMD EPYC 7662 "Rome" CPU, Cray MPICH, hipcc compiler # spock_kokkos = KOKKOS/HIP, AMD MI100 GPU and AMD EPYC 7662 "Rome" CPU, Cray MPICH, hipcc compiler, hipFFT
SHELL = /bin/sh SHELL = /bin/sh
@ -54,9 +54,12 @@ MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa
# PATH = path for FFT library # PATH = path for FFT library
# LIB = name of FFT library # LIB = name of FFT library
FFT_INC = MY_HIP_EXE = $(shell which hipcc)
MY_HIP_PATH = $(dir ${MY_HIP_EXE})
FFT_INC = -DFFT_HIPFFT
FFT_PATH = FFT_PATH =
FFT_LIB = FFT_LIB = -L${MY_HIP_PATH}../lib -lhipfft
# JPEG and/or PNG library # JPEG and/or PNG library
# see discussion in Section 3.5.4 of manual # see discussion in Section 3.5.4 of manual

View File

@ -13,6 +13,7 @@
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
Contributing author (ratio and subset) : Jake Gissinger (U Colorado) Contributing author (ratio and subset) : Jake Gissinger (U Colorado)
Contributing author (maxtry & overlap) : Eugen Rozic (IRB, Zagreb)
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
#include "create_atoms.h" #include "create_atoms.h"
@ -44,6 +45,7 @@ using namespace MathConst;
#define BIG 1.0e30 #define BIG 1.0e30
#define EPSILON 1.0e-6 #define EPSILON 1.0e-6
#define LB_FACTOR 1.1 #define LB_FACTOR 1.1
#define DEFAULT_MAXTRY 1000
enum { BOX, REGION, SINGLE, RANDOM }; enum { BOX, REGION, SINGLE, RANDOM };
enum { ATOM, MOLECULE }; enum { ATOM, MOLECULE };
@ -95,7 +97,6 @@ void CreateAtoms::command(int narg, char **arg)
region->init(); region->init();
region->prematch(); region->prematch();
iarg = 3; iarg = 3;
;
} else if (strcmp(arg[1], "single") == 0) { } else if (strcmp(arg[1], "single") == 0) {
style = SINGLE; style = SINGLE;
if (narg < 5) error->all(FLERR, "Illegal create_atoms command"); if (narg < 5) error->all(FLERR, "Illegal create_atoms command");
@ -107,7 +108,9 @@ void CreateAtoms::command(int narg, char **arg)
style = RANDOM; style = RANDOM;
if (narg < 5) error->all(FLERR, "Illegal create_atoms command"); if (narg < 5) error->all(FLERR, "Illegal create_atoms command");
nrandom = utils::inumeric(FLERR, arg[2], false, lmp); nrandom = utils::inumeric(FLERR, arg[2], false, lmp);
if (nrandom < 0) error->all(FLERR, "Illegal create_atoms command");
seed = utils::inumeric(FLERR, arg[3], false, lmp); seed = utils::inumeric(FLERR, arg[3], false, lmp);
if (seed <= 0) error->all(FLERR, "Illegal create_atoms command");
if (strcmp(arg[4], "NULL") == 0) if (strcmp(arg[4], "NULL") == 0)
region = nullptr; region = nullptr;
else { else {
@ -126,11 +129,15 @@ void CreateAtoms::command(int narg, char **arg)
remapflag = 0; remapflag = 0;
mode = ATOM; mode = ATOM;
int molseed; int molseed;
ranmol = nullptr;
varflag = 0; varflag = 0;
vstr = xstr = ystr = zstr = nullptr; vstr = xstr = ystr = zstr = nullptr;
quatone[0] = quatone[1] = quatone[2] = 0.0; quat_user = 0;
quatone[0] = quatone[1] = quatone[2] = quatone[3] = 0.0;
subsetflag = NONE; subsetflag = NONE;
int subsetseed; int subsetseed;
overlapflag = 0;
maxtry = DEFAULT_MAXTRY;
nbasis = domain->lattice->nbasis; nbasis = domain->lattice->nbasis;
basistype = new int[nbasis]; basistype = new int[nbasis];
@ -172,6 +179,8 @@ void CreateAtoms::command(int narg, char **arg)
error->all(FLERR, "Illegal create_atoms command"); error->all(FLERR, "Illegal create_atoms command");
iarg += 2; iarg += 2;
} else if (strcmp(arg[iarg], "var") == 0) { } else if (strcmp(arg[iarg], "var") == 0) {
if (style == SINGLE) error->all(FLERR, "Illegal create_atoms command: "
"can't combine 'var' keyword with 'single' style!");
if (iarg + 2 > narg) error->all(FLERR, "Illegal create_atoms command"); if (iarg + 2 > narg) error->all(FLERR, "Illegal create_atoms command");
delete[] vstr; delete[] vstr;
vstr = utils::strdup(arg[iarg + 1]); vstr = utils::strdup(arg[iarg + 1]);
@ -193,10 +202,10 @@ void CreateAtoms::command(int narg, char **arg)
iarg += 3; iarg += 3;
} else if (strcmp(arg[iarg], "rotate") == 0) { } else if (strcmp(arg[iarg], "rotate") == 0) {
if (iarg + 5 > narg) error->all(FLERR, "Illegal create_atoms command"); if (iarg + 5 > narg) error->all(FLERR, "Illegal create_atoms command");
quat_user = 1;
double thetaone; double thetaone;
double axisone[3]; double axisone[3];
thetaone = utils::numeric(FLERR, arg[iarg + 1], false, lmp) / 180.0 * MY_PI; thetaone = utils::numeric(FLERR, arg[iarg + 1], false, lmp) / 180.0 * MY_PI;
;
axisone[0] = utils::numeric(FLERR, arg[iarg + 2], false, lmp); axisone[0] = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
axisone[1] = utils::numeric(FLERR, arg[iarg + 3], false, lmp); axisone[1] = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
axisone[2] = utils::numeric(FLERR, arg[iarg + 4], false, lmp); axisone[2] = utils::numeric(FLERR, arg[iarg + 4], false, lmp);
@ -222,38 +231,46 @@ void CreateAtoms::command(int narg, char **arg)
subsetseed = utils::inumeric(FLERR, arg[iarg + 2], false, lmp); subsetseed = utils::inumeric(FLERR, arg[iarg + 2], false, lmp);
if (nsubset <= 0 || subsetseed <= 0) error->all(FLERR, "Illegal create_atoms command"); if (nsubset <= 0 || subsetseed <= 0) error->all(FLERR, "Illegal create_atoms command");
iarg += 3; iarg += 3;
} else if (strcmp(arg[iarg], "overlap") == 0) {
if (style != RANDOM)
error->all(FLERR, "Create_atoms overlap can only be used with random style");
if (iarg + 2 > narg) error->all(FLERR, "Illegal create_atoms command");
overlap = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
if (overlap <= 0) error->all(FLERR, "Illegal create_atoms command");
overlapflag = 1;
iarg += 2;
} else if (strcmp(arg[iarg], "maxtry") == 0) {
if (style != RANDOM)
error->all(FLERR, "Create_atoms maxtry can only be used with random style");
if (iarg + 2 > narg) error->all(FLERR, "Illegal create_atoms command");
maxtry = utils::inumeric(FLERR, arg[iarg + 1], false, lmp);
if (maxtry <= 0) error->all(FLERR,"Illegal create_atoms command");
iarg += 2;
} else } else
error->all(FLERR, "Illegal create_atoms command"); error->all(FLERR, "Illegal create_atoms command");
} }
// error checks // error checks and further setup for mode = MOLECULE
if (mode == ATOM && (ntype <= 0 || ntype > atom->ntypes)) if (mode == ATOM) {
if (ntype <= 0 || ntype > atom->ntypes)
error->all(FLERR, "Invalid atom type in create_atoms command"); error->all(FLERR, "Invalid atom type in create_atoms command");
} else if (mode == MOLECULE) {
if (style == RANDOM) { if (onemol->xflag == 0)
if (nrandom < 0) error->all(FLERR, "Illegal create_atoms command"); error->all(FLERR, "Create_atoms molecule must have coordinates");
if (seed <= 0) error->all(FLERR, "Illegal create_atoms command"); if (onemol->typeflag == 0)
} error->all(FLERR, "Create_atoms molecule must have atom types");
// error check and further setup for mode = MOLECULE
ranmol = nullptr;
if (mode == MOLECULE) {
if (onemol->xflag == 0) error->all(FLERR, "Create_atoms molecule must have coordinates");
if (onemol->typeflag == 0) error->all(FLERR, "Create_atoms molecule must have atom types");
if (ntype + onemol->ntypes <= 0 || ntype + onemol->ntypes > atom->ntypes) if (ntype + onemol->ntypes <= 0 || ntype + onemol->ntypes > atom->ntypes)
error->all(FLERR, "Invalid atom type in create_atoms mol command"); error->all(FLERR, "Invalid atom type in create_atoms mol command");
if (onemol->tag_require && !atom->tag_enable) if (onemol->tag_require && !atom->tag_enable)
error->all(FLERR, "Create_atoms molecule has atom IDs, but system does not"); error->all(FLERR, "Create_atoms molecule has atom IDs, but system does not");
onemol->check_attributes(0); onemol->check_attributes(0);
// create_atoms uses geoemetric center of molecule for insertion // use geometric center of molecule for insertion
onemol->compute_center();
// molecule random number generator, different for each proc // molecule random number generator, different for each proc
onemol->compute_center();
ranmol = new RanMars(lmp, molseed + comm->me); ranmol = new RanMars(lmp, molseed + comm->me);
} }
@ -305,6 +322,7 @@ void CreateAtoms::command(int narg, char **arg)
xone[0] *= domain->lattice->xlattice; xone[0] *= domain->lattice->xlattice;
xone[1] *= domain->lattice->ylattice; xone[1] *= domain->lattice->ylattice;
xone[2] *= domain->lattice->zlattice; xone[2] *= domain->lattice->zlattice;
overlap *= domain->lattice->xlattice;
} }
// set bounds for my proc in sublo[3] & subhi[3] // set bounds for my proc in sublo[3] & subhi[3]
@ -376,7 +394,7 @@ void CreateAtoms::command(int narg, char **arg)
} }
} }
// Record wall time for atom creation // record wall time for atom creation
MPI_Barrier(world); MPI_Barrier(world);
double time1 = platform::walltime(); double time1 = platform::walltime();
@ -630,12 +648,11 @@ void CreateAtoms::add_single()
if (coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] && coord[1] < subhi[1] && if (coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] && coord[1] < subhi[1] &&
coord[2] >= sublo[2] && coord[2] < subhi[2]) { coord[2] >= sublo[2] && coord[2] < subhi[2]) {
if (mode == ATOM) if (mode == ATOM) {
atom->avec->create_atom(ntype, xone); atom->avec->create_atom(ntype, xone);
else if (quatone[0] == 0.0 && quatone[1] == 0.0 && quatone[2] == 0.0) } else {
add_molecule(xone); add_molecule(xone);
else }
add_molecule(xone, quatone);
} }
} }
@ -646,9 +663,16 @@ void CreateAtoms::add_single()
void CreateAtoms::add_random() void CreateAtoms::add_random()
{ {
double xlo, ylo, zlo, xhi, yhi, zhi, zmid; double xlo, ylo, zlo, xhi, yhi, zhi, zmid;
double delx, dely, delz, distsq, odistsq;
double lamda[3], *coord; double lamda[3], *coord;
double *boxlo, *boxhi; double *boxlo, *boxhi;
if (overlapflag) {
double odist = overlap;
if (mode == MOLECULE) odist += onemol->molradius;
odistsq = odist*odist;
}
// random number generator, same for all procs // random number generator, same for all procs
// warm up the generator 30x to avoid correlations in first-particle // warm up the generator 30x to avoid correlations in first-particle
// positions if runs are repeated with consecutive seeds // positions if runs are repeated with consecutive seeds
@ -689,48 +713,100 @@ void CreateAtoms::add_random()
zhi = MIN(zhi, region->extent_zhi); zhi = MIN(zhi, region->extent_zhi);
} }
// generate random positions for each new atom/molecule within bounding box
// iterate until atom is within region, variable, and triclinic simulation box
// if final atom position is in my subbox, create it
if (xlo > xhi || ylo > yhi || zlo > zhi) if (xlo > xhi || ylo > yhi || zlo > zhi)
error->all(FLERR, "No overlap of box and region for create_atoms"); error->all(FLERR, "No overlap of box and region for create_atoms");
int valid; // insert Nrandom new atom/molecule into simulation box
int ntry,success;
int ninsert = 0;
for (int i = 0; i < nrandom; i++) { for (int i = 0; i < nrandom; i++) {
while (true) {
// attempt to insert an atom/molecule up to maxtry times
// criteria for insertion: region, variable, triclinic box, overlap
success = 0;
ntry = 0;
while (ntry < maxtry) {
ntry++;
xone[0] = xlo + random->uniform() * (xhi - xlo); xone[0] = xlo + random->uniform() * (xhi - xlo);
xone[1] = ylo + random->uniform() * (yhi - ylo); xone[1] = ylo + random->uniform() * (yhi - ylo);
xone[2] = zlo + random->uniform() * (zhi - zlo); xone[2] = zlo + random->uniform() * (zhi - zlo);
if (domain->dimension == 2) xone[2] = zmid; if (domain->dimension == 2) xone[2] = zmid;
valid = 1; if (region && (region->match(xone[0], xone[1], xone[2]) == 0)) continue;
if (region && (region->match(xone[0], xone[1], xone[2]) == 0)) valid = 0; if (varflag && vartest(xone) == 0) continue;
if (varflag && vartest(xone) == 0) valid = 0;
if (triclinic) { if (triclinic) {
domain->x2lamda(xone, lamda); domain->x2lamda(xone, lamda);
coord = lamda; coord = lamda;
if (coord[0] < boxlo[0] || coord[0] >= boxhi[0] || coord[1] < boxlo[1] || if (coord[0] < boxlo[0] || coord[0] >= boxhi[0] || coord[1] < boxlo[1] ||
coord[1] >= boxhi[1] || coord[2] < boxlo[2] || coord[2] >= boxhi[2]) coord[1] >= boxhi[1] || coord[2] < boxlo[2] || coord[2] >= boxhi[2])
valid = 0; continue;
} else } else {
coord = xone; coord = xone;
if (valid) break;
} }
// check for overlap of new atom/mol with all other atoms
// including prior insertions
// minimum_image() needed to account for distances across PBC
// new molecule only checks its center pt against others
// odistsq is expanded for mode=MOLECULE to account for molecule size
if (overlapflag) {
double **x = atom->x;
int nlocal = atom->nlocal;
int reject = 0;
for (int i = 0; i < nlocal; i++) {
delx = xone[0] - x[i][0];
dely = xone[1] - x[i][1];
delz = xone[2] - x[i][2];
domain->minimum_image(delx, dely, delz);
distsq = delx*delx + dely*dely + delz*delz;
if (distsq < odistsq) {
reject = 1;
break;
}
}
int reject_any;
MPI_Allreduce(&reject, &reject_any, 1, MPI_INT, MPI_MAX, world);
if (reject_any) continue;
}
// all tests passed
success = 1;
break;
}
// insertion failed, advance to next atom/molecule
if (!success) continue;
// insertion criteria were met
// if final atom position is in my subbox, create it
// if triclinic, coord is now in lamda units // if triclinic, coord is now in lamda units
ninsert++;
if (coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] && if (coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] &&
coord[1] < subhi[1] && coord[2] >= sublo[2] && coord[2] < subhi[2]) { coord[1] < subhi[1] && coord[2] >= sublo[2] && coord[2] < subhi[2]) {
if (mode == ATOM) if (mode == ATOM) {
atom->avec->create_atom(ntype, xone); atom->avec->create_atom(ntype, xone);
else if (quatone[0] == 0 && quatone[1] == 0 && quatone[2] == 0) } else {
add_molecule(xone); add_molecule(xone);
else
add_molecule(xone, quatone);
} }
} }
}
// warn if did not successfully insert Nrandom atoms/molecules
if (ninsert < nrandom && comm->me == 0)
error->warning(FLERR, "Only inserted {} particles out of {}", ninsert, nrandom);
// clean-up // clean-up
@ -785,6 +861,7 @@ void CreateAtoms::add_lattice()
xmax = ymax = zmax = -BIG; xmax = ymax = zmax = -BIG;
// convert to lattice coordinates and set bounding box // convert to lattice coordinates and set bounding box
domain->lattice->bbox(1, bboxlo[0], bboxlo[1], bboxlo[2], xmin, ymin, zmin, xmax, ymax, zmax); domain->lattice->bbox(1, bboxlo[0], bboxlo[1], bboxlo[2], xmin, ymin, zmin, xmax, ymax, zmax);
domain->lattice->bbox(1, bboxhi[0], bboxlo[1], bboxlo[2], xmin, ymin, zmin, xmax, ymax, zmax); domain->lattice->bbox(1, bboxhi[0], bboxlo[1], bboxlo[2], xmin, ymin, zmin, xmax, ymax, zmax);
domain->lattice->bbox(1, bboxlo[0], bboxhi[1], bboxlo[2], xmin, ymin, zmin, xmax, ymax, zmax); domain->lattice->bbox(1, bboxlo[0], bboxhi[1], bboxlo[2], xmin, ymin, zmin, xmax, ymax, zmax);
@ -905,7 +982,8 @@ void CreateAtoms::loop_lattice(int action)
// if variable test specified, eval variable // if variable test specified, eval variable
if (varflag && vartest(x) == 0) continue; if (varflag && vartest(x) == 0)
continue;
// test if atom/molecule position is in my subbox // test if atom/molecule position is in my subbox
@ -924,21 +1002,19 @@ void CreateAtoms::loop_lattice(int action)
// add = add an atom or entire molecule to my list of atoms // add = add an atom or entire molecule to my list of atoms
if (action == INSERT) { if (action == INSERT) {
if (mode == ATOM) if (mode == ATOM) {
atom->avec->create_atom(basistype[m], x); atom->avec->create_atom(basistype[m], x);
else if (quatone[0] == 0 && quatone[1] == 0 && quatone[2] == 0) } else {
add_molecule(x); add_molecule(x);
else }
add_molecule(x, quatone);
} else if (action == COUNT) { } else if (action == COUNT) {
if (nlatt == MAXSMALLINT) nlatt_overflow = 1; if (nlatt == MAXSMALLINT) nlatt_overflow = 1;
} else if (action == INSERT_SELECTED && flag[nlatt]) { } else if (action == INSERT_SELECTED && flag[nlatt]) {
if (mode == ATOM) if (mode == ATOM) {
atom->avec->create_atom(basistype[m], x); atom->avec->create_atom(basistype[m], x);
else if (quatone[0] == 0 && quatone[1] == 0 && quatone[2] == 0) } else {
add_molecule(x); add_molecule(x);
else }
add_molecule(x, quatone);
} }
nlatt++; nlatt++;
@ -949,43 +1025,42 @@ void CreateAtoms::loop_lattice(int action)
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
add a randomly rotated molecule with its center at center add a molecule with its center at center
if quat_user set, perform requested rotation
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void CreateAtoms::add_molecule(double *center, double *quat_user) void CreateAtoms::add_molecule(double *center)
{ {
int n; double r[3], rotmat[3][3];
double r[3], rotmat[3][3], quat[4], xnew[3];
if (quat_user) { // use quatone as-is if user set it
quat[0] = quat_user[0]; // else generate random quaternion in quatone
quat[1] = quat_user[1];
quat[2] = quat_user[2]; if (!quat_user) {
quat[3] = quat_user[3];
} else {
if (domain->dimension == 3) { if (domain->dimension == 3) {
r[0] = ranmol->uniform() - 0.5; r[0] = ranmol->uniform() - 0.5;
r[1] = ranmol->uniform() - 0.5; r[1] = ranmol->uniform() - 0.5;
r[2] = ranmol->uniform() - 0.5; r[2] = ranmol->uniform() - 0.5;
MathExtra::norm3(r);
} else { } else {
r[0] = r[1] = 0.0; r[0] = r[1] = 0.0;
r[2] = 1.0; r[2] = 1.0;
} }
MathExtra::norm3(r);
double theta = ranmol->uniform() * MY_2PI; double theta = ranmol->uniform() * MY_2PI;
MathExtra::axisangle_to_quat(r, theta, quat); MathExtra::axisangle_to_quat(r, theta, quatone);
} }
MathExtra::quat_to_mat(quat, rotmat); MathExtra::quat_to_mat(quatone, rotmat);
onemol->quat_external = quat;
// create atoms in molecule with atom ID = 0 and mol ID = 0 // create atoms in molecule with atom ID = 0 and mol ID = 0
// reset in caller after all molecules created by all procs // IDs are reset in caller after all molecules created by all procs
// pass add_molecule_atom an offset of 0 since don't know // pass add_molecule_atom an offset of 0 since don't know
// max tag of atoms in previous molecules at this point // max tag of atoms in previous molecules at this point
// onemol->quat_external is used by atom->add_moleclue_atom()
int natoms = onemol->natoms; onemol->quat_external = quatone;
int n, natoms = onemol->natoms;
double xnew[3];
for (int m = 0; m < natoms; m++) { for (int m = 0; m < natoms; m++) {
MathExtra::matvec(rotmat, onemol->dx[m], xnew); MathExtra::matvec(rotmat, onemol->dx[m], xnew);
MathExtra::add3(xnew, center, xnew); MathExtra::add3(xnew, center, xnew);

View File

@ -32,6 +32,10 @@ class CreateAtoms : public Command {
private: private:
int ntype, style, mode, nbasis, nrandom, seed; int ntype, style, mode, nbasis, nrandom, seed;
int remapflag; int remapflag;
int maxtry;
int quat_user;
int overlapflag;
double overlap;
int subsetflag; int subsetflag;
bigint nsubset; bigint nsubset;
double subsetfrac; double subsetfrac;
@ -62,7 +66,7 @@ class CreateAtoms : public Command {
void add_random(); void add_random();
void add_lattice(); void add_lattice();
void loop_lattice(int); void loop_lattice(int);
void add_molecule(double *, double * = nullptr); void add_molecule(double *);
int vartest(double *); // evaluate a variable with new atom position int vartest(double *); // evaluate a variable with new atom position
}; };

View File

@ -32,6 +32,8 @@
#define LMP_FFT_LIB "MKL FFT" #define LMP_FFT_LIB "MKL FFT"
#elif defined(FFT_CUFFT) #elif defined(FFT_CUFFT)
#define LMP_FFT_LIB "cuFFT" #define LMP_FFT_LIB "cuFFT"
#elif defined(FFT_HIPFFT)
#define LMP_FFT_LIB "hipFFT"
#else #else
#define LMP_FFT_LIB "KISS FFT" #define LMP_FFT_LIB "KISS FFT"
#endif #endif