Merge branch 'develop' into collected-small-changes

This commit is contained in:
Axel Kohlmeyer
2022-05-06 11:45:51 -04:00
15 changed files with 454 additions and 179 deletions

View File

@ -947,6 +947,12 @@ if(PKG_KSPACE)
else()
message(STATUS "Kokkos FFT: cuFFT")
endif()
elseif(Kokkos_ENABLE_HIP)
if(FFT STREQUAL "KISS")
message(STATUS "Kokkos FFT: KISS")
else()
message(STATUS "Kokkos FFT: hipFFT")
endif()
else()
message(STATUS "Kokkos FFT: ${FFT}")
endif()

View File

@ -130,6 +130,11 @@ if(PKG_KSPACE)
target_compile_definitions(lammps PRIVATE -DFFT_CUFFT)
target_link_libraries(lammps PRIVATE cufft)
endif()
elseif(Kokkos_ENABLE_HIP)
if(NOT (FFT STREQUAL "KISS"))
target_compile_definitions(lammps PRIVATE -DFFT_HIPFFT)
target_link_libraries(lammps PRIVATE hipfft)
endif()
endif()
endif()

View File

@ -641,6 +641,20 @@ This list was last updated for version 3.5.0 of the Kokkos library.
-D CMAKE_CXX_COMPILER=${HOME}/lammps/lib/kokkos/bin/nvcc_wrapper
For AMD or NVIDIA GPUs using HIP, set these variables:
.. code-block:: bash
-D Kokkos_ARCH_HOSTARCH=yes # HOSTARCH = HOST from list above
-D Kokkos_ARCH_GPUARCH=yes # GPUARCH = GPU from list above
-D Kokkos_ENABLE_HIP=yes
-D Kokkos_ENABLE_OPENMP=yes
This will enable FFTs on the GPU, either by the internal KISSFFT library
or with the hipFFT wrapper library, which will call out to the
platform-appropriate vendor library: rocFFT on AMD GPUs or cuFFT on
NVIDIA GPUs.
To simplify compilation, four preset files are included in the
``cmake/presets`` folder, ``kokkos-serial.cmake``,
``kokkos-openmp.cmake``, ``kokkos-cuda.cmake``, and
@ -707,6 +721,15 @@ This list was last updated for version 3.5.0 of the Kokkos library.
KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd)
CC = mpicxx -cxx=$(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper
For AMD or NVIDIA GPUs using HIP:
.. code-block:: make
KOKKOS_DEVICES = HIP
KOKKOS_ARCH = HOSTARCH,GPUARCH # HOSTARCH = HOST from list above that is hosting the GPU
# GPUARCH = GPU from list above
FFT_INC = -DFFT_HIPFFT # enable use of hipFFT (optional)
FFT_LIB = -lhipfft # link to hipFFT library
Advanced KOKKOS compilation settings
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -26,11 +26,11 @@ Syntax
region-ID = create atoms within this region, use NULL for entire simulation box
* zero or more keyword/value pairs may be appended
* keyword = *mol* or *basis* or *ratio* or *subset* or *remap* or *var* or *set* or *rotate* or *units*
* keyword = *mol* or *basis* or *ratio* or *subset* or *remap* or *var* or *set* or *rotate* or *overlap* or *maxtry* or *units*
.. parsed-literal::
*mol* value = template-ID seed
*mol* values = template-ID seed
template-ID = ID of molecule template specified in a separate :doc:`molecule <molecule>` command
seed = random # seed (positive integer)
*basis* values = M itype
@ -50,6 +50,10 @@ Syntax
*rotate* values = theta Rx Ry Rz
theta = rotation angle for single molecule (degrees)
Rx,Ry,Rz = rotation vector for single molecule
*overlap* value = Doverlap
Doverlap = only insert if at least this distance from all existing atoms
*maxtry* value = Ntry
Ntry = number of attempts to insert a particle before failure
*units* value = *lattice* or *box*
*lattice* = the geometry is defined in lattice units
*box* = the geometry is defined in simulation box units
@ -64,20 +68,22 @@ Examples
create_atoms 3 region regsphere basis 2 3 ratio 0.5 74637
create_atoms 3 single 0 0 5
create_atoms 1 box var v set x xpos set y ypos
create_atoms 2 random 50 12345 NULL overlap 2.0 maxtry 50
Description
"""""""""""
This command creates atoms (or molecules) on a lattice, or a single
atom (or molecule), or a random collection of atoms (or molecules), as
an alternative to reading in their coordinates explicitly via a
:doc:`read_data <read_data>` or :doc:`read_restart <read_restart>`
command. A simulation box must already exist, which is typically
created via the :doc:`create_box <create_box>` command. Before using
this command, a lattice must also be defined using the
:doc:`lattice <lattice>` command, unless you specify the *single* style
with units = box or the *random* style. For the remainder of this doc
page, a created atom or molecule is referred to as a "particle".
This command creates atoms (or molecules) within the simulation box,
either on a lattice, or a single atom (or molecule), or a random
collection of atoms (or molecules). It is an alternative to reading
in atom coordinates explicitly via a :doc:`read_data <read_data>` or
:doc:`read_restart <read_restart>` command. A simulation box must
already exist, which is typically created via the :doc:`create_box
<create_box>` command. Before using this command, a lattice must also
be defined using the :doc:`lattice <lattice>` command, unless you
specify the *single* style with units = box or the *random* style.
For the remainder of this doc page, a created atom or molecule is
referred to as a "particle".
If created particles are individual atoms, they are assigned the
specified atom *type*, though this can be altered via the *basis*
@ -97,58 +103,70 @@ particular dimension, LAMMPS is careful to put exactly one particle at
the boundary (on either side of the box), not zero or two.
For the *region* style, a geometric volume is filled with particles on
the lattice. This volume what is inside the simulation box and is
also consistent with the region volume. See the :doc:`region <region>`
command for details. Note that a region can be specified so that its
"volume" is either inside or outside a geometric boundary. Also note
that if your region is the same size as a periodic simulation box (in
some dimension), LAMMPS does not implement the same logic described
above as for the *box* style, to insure exactly one particle at
periodic boundaries. if this is what you desire, you should either
use the *box* style, or tweak the region size to get precisely the
particles you want.
the lattice. This volume is what is both inside the simulation box
and also consistent with the region volume. See the :doc:`region
<region>` command for details. Note that a region can be specified so
that its "volume" is either inside or outside its geometric boundary.
Also note that if a region is the same size as a periodic simulation
box (in some dimension), LAMMPS does NOT implement the same logic
described above for the *box* style, to insure exactly one particle at
periodic boundaries. If this is desired, you should either use the
*box* style, or tweak the region size to get precisely the particles
you want.
For the *single* style, a single particle is added to the system at
the specified coordinates. This can be useful for debugging purposes
or to create a tiny system with a handful of particles at specified
positions.
For the *random* style, N particles are added to the system at
For the *random* style, *N* particles are added to the system at
randomly generated coordinates, which can be useful for generating an
amorphous system. The particles are created one by one using the
specified random number *seed*, resulting in the same set of particles
specified random number *seed*, resulting in the same set of particle
coordinates, independent of how many processors are being used in the
simulation. If the *region-ID* argument is specified as NULL, then
the created particles will be anywhere in the simulation box. If a
simulation. Unless the *overlap* keyword is specified, particles
created by the *random* style will typically be highly overlapped.
Various additional criteria can be used to accept or reject a random
particle insertion; see the keyword discussion below. Multiple
attempts per particle are made (see the *maxtry* keyword) until the
insertion is either successful or fails. If this command fails to add
all requested *N* particles, a warning will be output.
If the *region-ID* argument is specified as NULL, then the randomly
created particles will be anywhere in the simulation box. If a
*region-ID* is specified, a geometric volume is filled which is both
inside the simulation box and is also consistent with the region
volume. See the :doc:`region <region>` command for details. Note that
a region can be specified so that its "volume" is either inside or
outside a geometric boundary.
volume. See the :doc:`region <region>` command for details. Note
that a region can be specified so that its "volume" is either inside
or outside its geometric boundary.
.. note::
Note that the create_atoms command adds particles to those that
already exist. This means it can be used to add particles to a system
previously read in from a data or restart file. Or the create_atoms
command can be used multiple times, to add multiple sets of particles
to the simulation. For example, grain boundaries can be created, by
interleaving the create_atoms command with :doc:`lattice <lattice>`
commands specifying different orientations.
Particles generated by the *random* style will typically be
highly overlapped which will cause many interatomic potentials to
compute large energies and forces. Thus you should either perform an
:doc:`energy minimization <minimize>` or run dynamics with :doc:`fix nve/limit <fix_nve_limit>` to equilibrate such a system, before
running normal dynamics.
When this command is used, care should be taken to insure the
resulting system does not contain particles which are highly
overlapped. Such overlaps will cause many interatomic potentials to
compute huge energies and forces, leading to bad dynamics. There are
several strategies to avoid this problem:
Note that this command adds particles to those that already exist.
This means it can be used to add particles to a system previously read
in from a data or restart file. Or the create_atoms command can be
used multiple times, to add multiple sets of particles to the
simulation. For example, grain boundaries can be created, by
interleaving create_atoms with :doc:`lattice <lattice>` commands
specifying different orientations. By using the create_atoms command
in conjunction with the :doc:`delete_atoms <delete_atoms>` command,
reasonably complex geometries can be created, or a protein can be
solvated with a surrounding box of water molecules.
* Use the :doc:`delete_atoms overlap <delete_atoms>` command after
create_atoms. For example, this strategy can be used to overlay and
surround a large protein molecule with a volume of water molecules,
then delete water molecules that overlap with the protein atoms.
In all these cases, care should be taken to insure that new atoms do
not overlap existing atoms inappropriately, especially if molecules
are being added. The :doc:`delete_atoms <delete_atoms>` command can be
used to remove overlapping atoms or molecules.
* For the *random* style, use the optional *overlap* keyword to avoid
overlaps when each new particle is created.
* Before running dynamics on an overlapped system, perform an
:doc:`energy minimization <minimize>`. Or run initial dynamics with
:doc:`pair_style soft <pair_soft>` or with :doc:`fix nve/limit
<fix_nve_limit>` to un-overlap the particles, before running normal
dynamics.
.. note::
@ -156,12 +174,13 @@ used to remove overlapping atoms or molecules.
that are outside the simulation box; they will just be ignored by
LAMMPS. This is true even if you are using shrink-wrapped box
boundaries, as specified by the :doc:`boundary <boundary>` command.
However, you can first use the :doc:`change_box <change_box>` command to
temporarily expand the box, then add atoms via create_atoms, then
finally use change_box command again if needed to re-shrink-wrap the
new atoms. See the :doc:`change_box <change_box>` page for an
example of how to do this, using the create_atoms *single* style to
insert a new atom outside the current simulation box.
However, you can first use the :doc:`change_box <change_box>`
command to temporarily expand the box, then add atoms via
create_atoms, then finally use change_box command again if needed
to re-shrink-wrap the new atoms. See the :doc:`change_box
<change_box>` doc page for an example of how to do this, using the
create_atoms *single* style to insert a new atom outside the
current simulation box.
----------
@ -180,17 +199,19 @@ Using a lattice to add molecules, e.g. via the *box* or *region* or
points, except that entire molecules are added at each point, i.e. on
the point defined by each basis atom in the unit cell as it tiles the
simulation box or region. This is done by placing the geometric
center of the molecule at the lattice point, and giving the molecule a
random orientation about the point. The random *seed* specified with
the *mol* keyword is used for this operation, and the random numbers
generated by each processor are different. This means the coordinates
of individual atoms (in the molecules) will be different when running
on different numbers of processors, unlike when atoms are being
created in parallel.
center of the molecule at the lattice point, and (by default) giving
the molecule a random orientation about the point. The random *seed*
specified with the *mol* keyword is used for this operation, and the
random numbers generated by each processor are different. This means
the coordinates of individual atoms (in the molecules) will be
different when running on different numbers of processors, unlike when
atoms are being created in parallel.
Also note that because of the random rotations, it may be important to
use a lattice with a large enough spacing that adjacent molecules will
not overlap, regardless of their relative orientations.
Note that with random rotations, it may be important to use a lattice
with a large enough spacing that adjacent molecules will not overlap,
regardless of their relative orientations. See the description of the
*rotate* keyword below, which overrides the default random orientation
and inserts all molecules at a specified orientation.
.. note::
@ -204,7 +225,7 @@ not overlap, regardless of their relative orientations.
----------
This is the meaning of the other allowed keywords.
This is the meaning of the other optional keywords.
The *basis* keyword is only used when atoms (not molecules) are being
created. It specifies an atom type that will be assigned to specific
@ -234,18 +255,24 @@ and no particle is created if its position is outside the box.
The *var* and *set* keywords can be used together to provide a
criterion for accepting or rejecting the addition of an individual
atom, based on its coordinates. The *name* specified for the *var*
keyword is the name of an :doc:`equal-style variable <variable>` which
should evaluate to a zero or non-zero value based on one or two or
three variables which will store the x, y, or z coordinates of an atom
(one variable per coordinate). If used, these other variables must be
:doc:`internal-style variables <variable>` defined in the input script;
their initial numeric value can be anything. They must be
atom, based on its coordinates. They apply to all styles except
*single*. The *name* specified for the *var* keyword is the name of
an :doc:`equal-style variable <variable>` which should evaluate to a
zero or non-zero value based on one or two or three variables which
will store the x, y, or z coordinates of an atom (one variable per
coordinate). If used, these other variables must be
:doc:`internal-style variables <variable>` defined in the input
script; their initial numeric value can be anything. They must be
internal-style variables, because this command resets their values
directly. The *set* keyword is used to identify the names of these
other variables, one variable for the x-coordinate of a created atom,
one for y, and one for z.
.. figure:: img/sinusoid.jpg
:figwidth: 50%
:align: right
:target: _images/sinusoid.jpg
When an atom is created, its x,y,z coordinates become the values for
any *set* variable that is defined. The *var* variable is then
evaluated. If the returned value is 0.0, the atom is not created. If
@ -259,28 +286,26 @@ the sinusoid would appear to be "smoother". Also note the use of the
"xlat" and "ylat" :doc:`thermo_style <thermo_style>` keywords which
converts lattice spacings to distance.
.. only:: html
(Click on the image for a larger version)
.. code-block:: LAMMPS
dimension 2
variable x equal 100
variable y equal 25
lattice hex 0.8442
region box block 0 $x 0 $y -0.5 0.5
create_box 1 box
dimension 2
variable x equal 100
variable y equal 25
lattice hex 0.8442
region box block 0 $x 0 $y -0.5 0.5
create_box 1 box
variable xx internal 0.0
variable yy internal 0.0
variable v equal "(0.2*v_y*ylat * cos(v_xx/xlat * 2.0*PI*4.0/v_x) + 0.5*v_y*ylat - v_yy) > 0.0"
create_atoms 1 box var v set x xx set y yy
write_dump all atom sinusoid.lammpstrj
variable xx internal 0.0
variable yy internal 0.0
variable v equal "(0.2*v_y*ylat * cos(v_xx/xlat * 2.0*PI*4.0/v_x) + 0.5*v_y*ylat - v_yy) > 0.0"
create_atoms 1 box var v set x xx set y yy
write_dump all atom sinusoid.lammpstrj
.. image:: img/sinusoid.jpg
:scale: 50%
:align: center
.. raw:: html
Click on the image for a larger version.
-----
The *rotate* keyword allows specification of the orientation
at which molecules are inserted. The axis of rotation is
@ -291,10 +316,72 @@ the atoms around the rotation axis is consistent with the right-hand
rule: if your right-hand's thumb points along *R*, then your fingers
wrap around the axis in the direction of rotation.
The *overlap* keyword only applies to the *random* style. It prevents
newly created particles from being created closer than the specified
*Doverlap* distance from any other particle. When the particles being
created are molecules, the radius of the molecule (from its geometric
center) is added to *Doverlap*. If particles have finite size (see
:doc:`atom_style sphere <atom_style>` for example) *Doverlap* should
be specified large enough to include the particle size in the
non-overlapping criterion.
.. note::
Checking for overlaps is a costly O(N(N+M)) operation for inserting
*N* new particles into a system with *M* existing particles. This
is because distances to all *M* existing particles are computed for
each new particle that is added. Thus the intended use of this
keyword is to add relatively small numbers of particles to systems
which remain at a relatively low density even after the new
particles are created. Careful use of the *maxtry* keyword in
combination with *overlap* is recommended. See the discussion
above about systems with overlapped particles for alternate
strategies that allow for overlapped insertions.
The *maxtry* keyword only applies to the *random* style. It limits
the number of attempts to generate valid coordinates for a single new
particle that satisfy all requirements imposed by the *region*, *var*,
and *overlap* keywords. The default is 10 attempts per particle
before the loop over the requested *N* particles advances to the next
particle. Note that if insertion success is unlikely (e.g. inserting
new particles into a dense system using the *overlap* keyword),
setting the *maxtry* keyword to a large value may result in this
command running for a long time.
.. figure:: img/overlap.png
:figwidth: 30%
:align: right
:target: _images/overlap.png
Here is an example for the *random* style using these commands
.. code-block:: LAMMPS
units lj
dimension 2
region box block 0 50 0 50 -0.5 0.5
create_box 1 box
create_atoms 1 random 2000 13487 NULL overlap 1.0 maxtry 50
pair_style lj/cut 2.5
pair_coeff 1 1 1.0 1.0 2.5
to produce a system as shown in the image with 1520 particles (out of
2000 requested) that are moderately dense and which have no overlaps
sufficient to prevent the LJ pair_style from running properly (because
the overlap criterion = 1.0). The create_atoms command ran for 0.3 s
on a single CPU core.
.. only:: html
(Click on the image for a larger version)
-----
The *units* keyword determines the meaning of the distance units used
to specify the coordinates of the one particle created by the *single*
style. A *box* value selects standard distance units as defined by
the :doc:`units <units>` command, e.g. Angstroms for units = real or
style, or the overlap distance *Doverlap* by the *overlap* keyword. A
*box* value selects standard distance units as defined by the
:doc:`units <units>` command, e.g. Angstroms for units = real or
metal. A *lattice* value means the distance units are in lattice
spacings.
@ -315,9 +402,10 @@ assigned to created molecules in a similar fashion.
Aside from their ID, atom type, and xyz position, other properties of
created atoms are set to default values, depending on which quantities
are defined by the chosen :doc:`atom style <atom_style>`. See the :doc:`atom style <atom_style>` command for more details. See the
:doc:`set <set>` and :doc:`velocity <velocity>` commands for info on how
to change these values.
are defined by the chosen :doc:`atom style <atom_style>`. See the
:doc:`atom style <atom_style>` command for more details. See the
:doc:`set <set>` and :doc:`velocity <velocity>` commands for info on
how to change these values.
* charge = 0.0
* dipole moment magnitude = 0.0
@ -373,4 +461,4 @@ Default
The default for the *basis* keyword is that all created atoms are
assigned the argument *type* as their atom type (when single atoms are
being created). The other defaults are *remap* = no, *rotate* =
random, and *units* = lattice.
random, *overlap* not checked, *maxtry* = 10, and *units* = lattice.

BIN
doc/src/img/overlap.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 572 KiB

View File

@ -1326,6 +1326,7 @@ hiID
Hijazi
Hilger
Hinestrosa
hipFFT
histo
histogrammed
histogramming
@ -1951,6 +1952,7 @@ maxsize
maxspecial
maxSteps
maxstrain
maxtry
maxwell
Maxwellian
maxX
@ -2951,6 +2953,7 @@ rnage
rng
rNEMD
ro
rocFFT
Rochus
Rockett
rocksalt

1
src/.gitignore vendored
View File

@ -627,6 +627,7 @@
/ewald.h
/ewald_cg.cpp
/ewald_cg.h
/ewald_const.h
/ewald_dipole.cpp
/ewald_dipole.h
/ewald_dipole_spin.cpp

View File

@ -46,13 +46,17 @@ FFT3dKokkos<DeviceType>::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int
#if defined(FFT_MKL)
if (ngpus > 0 && execution_space == Device)
lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos CUDA on GPUs");
lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos on GPUs");
#elif defined(FFT_FFTW3)
if (ngpus > 0 && execution_space == Device)
lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos CUDA on GPUs");
lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos on GPUs");
#elif defined(FFT_CUFFT)
if (ngpus > 0 && execution_space == Host)
lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos CUDA on the host CPUs");
lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos on the host CPUs");
#elif defined(FFT_HIPFFT)
if (ngpus > 0 && execution_space == Host)
lmp->error->all(FLERR,"Cannot use the hipFFT library with Kokkos on the host CPUs");
#elif defined(FFT_KISSFFT)
// The compiler can't statically determine the stack size needed for
// recursive function calls in KISS FFT and the default per-thread
@ -145,7 +149,7 @@ public:
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
#if defined(FFT_FFTW3) || defined(FFT_CUFFT)
#if defined(FFT_FFTW3) || defined(FFT_CUFFT) || defined(FFT_HIPFFT)
FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i);
*(out_ptr++) *= norm;
*(out_ptr++) *= norm;
@ -227,6 +231,8 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
#elif defined(FFT_CUFFT)
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
#elif defined(FFT_HIPFFT)
hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
#else
typename FFT_AT::t_FFT_DATA_1d d_tmp =
typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
@ -271,6 +277,8 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
#elif defined(FFT_CUFFT)
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
#elif defined(FFT_HIPFFT)
hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
#else
d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
if (flag == 1)
@ -313,6 +321,8 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
#elif defined(FFT_CUFFT)
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
#elif defined(FFT_HIPFFT)
hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
#else
d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
if (flag == 1)
@ -699,6 +709,23 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
&nslow,1,plan->length3,
CUFFT_TYPE,plan->total3/plan->length3);
#elif defined(FFT_HIPFFT)
hipfftPlanMany(&(plan->plan_fast), 1, &nfast,
&nfast,1,plan->length1,
&nfast,1,plan->length1,
HIPFFT_TYPE,plan->total1/plan->length1);
hipfftPlanMany(&(plan->plan_mid), 1, &nmid,
&nmid,1,plan->length2,
&nmid,1,plan->length2,
HIPFFT_TYPE,plan->total2/plan->length2);
hipfftPlanMany(&(plan->plan_slow), 1, &nslow,
&nslow,1,plan->length3,
&nslow,1,plan->length3,
HIPFFT_TYPE,plan->total3/plan->length3);
#else /* FFT_KISS */
kissfftKK = new KissFFTKokkos<DeviceType>();
@ -863,6 +890,10 @@ void FFT3dKokkos<DeviceType>::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
#elif defined(FFT_HIPFFT)
hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
#else
kiss_fft_functor<DeviceType> f;
typename FFT_AT::t_FFT_DATA_1d d_tmp =

View File

@ -60,6 +60,10 @@ struct fft_plan_3d_kokkos {
cufftHandle plan_fast;
cufftHandle plan_mid;
cufftHandle plan_slow;
#elif defined(FFT_HIPFFT)
hipfftHandle plan_fast;
hipfftHandle plan_mid;
hipfftHandle plan_slow;
#else
kiss_fft_state_kokkos<DeviceType> cfg_fast_forward;
kiss_fft_state_kokkos<DeviceType> cfg_fast_backward;

View File

@ -36,8 +36,8 @@
#endif
// with KOKKOS in CUDA mode we can only have
// CUFFT or KISSFFT, thus undefine all other
// with KOKKOS in CUDA or HIP mode we can only have
// CUFFT/HIPFFT or KISSFFT, thus undefine all other
// FFTs here, since they may be valid in fft3d.cpp
#ifdef KOKKOS_ENABLE_CUDA
@ -53,10 +53,26 @@
# if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT)
# define FFT_KISSFFT
# endif
#elif defined(KOKKOS_ENABLE_HIP)
# if defined(FFT_FFTW)
# undef FFT_FFTW
# endif
# if defined(FFT_FFTW3)
# undef FFT_FFTW3
# endif
# if defined(FFT_MKL)
# undef FFT_MKL
# endif
# if !defined(FFT_HIPFFT) && !defined(FFT_KISSFFT)
# define FFT_KISSFFT
# endif
#else
# if defined(FFT_CUFFT)
# error "Must enable CUDA with KOKKOS to use -DFFT_CUFFT"
# endif
# if defined(FFT_HIPFFT)
# error "Must enable HIP with KOKKOS to use -DFFT_HIPFFT"
# endif
// if user set FFTW, it means FFTW3
# ifdef FFT_FFTW
# define FFT_FFTW3
@ -97,6 +113,17 @@
#define CUFFT_TYPE CUFFT_Z2Z
typedef cufftDoubleComplex FFT_DATA;
#endif
#elif defined(FFT_HIPFFT)
#include "hipfft.h"
#if defined(FFT_SINGLE)
#define hipfftExec hipfftExecC2C
#define HIPFFT_TYPE HIPFFT_C2C
typedef hipfftComplex FFT_DATA;
#else
#define hipfftExec hipfftExecZ2Z
#define HIPFFT_TYPE HIPFFT_Z2Z
typedef hipfftDoubleComplex FFT_DATA;
#endif
#else
#if defined(FFT_SINGLE)
#define kiss_fft_scalar float

View File

@ -1,4 +1,4 @@
# crusher_kokkos = KOKKOS/HIP, AMD MI250X GPU and AMD EPYC 7A53 "Optimized 3rd Gen EPYC" CPU, Cray MPICH, hipcc compiler
# crusher_kokkos = KOKKOS/HIP, AMD MI250X GPU and AMD EPYC 7A53 "Optimized 3rd Gen EPYC" CPU, Cray MPICH, hipcc compiler, hipFFT
SHELL = /bin/sh
@ -54,9 +54,12 @@ MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC =
MY_HIP_EXE = $(shell which hipcc)
MY_HIP_PATH = $(dir ${MY_HIP_EXE})
FFT_INC = -DFFT_HIPFFT
FFT_PATH =
FFT_LIB =
FFT_LIB = -L${MY_HIP_PATH}../lib -lhipfft
# JPEG and/or PNG library
# see discussion in Section 3.5.4 of manual

View File

@ -1,4 +1,4 @@
# spock_kokkos = KOKKOS/HIP, AMD MI100 GPU and AMD EPYC 7662 "Rome" CPU, Cray MPICH, hipcc compiler
# spock_kokkos = KOKKOS/HIP, AMD MI100 GPU and AMD EPYC 7662 "Rome" CPU, Cray MPICH, hipcc compiler, hipFFT
SHELL = /bin/sh
@ -54,9 +54,12 @@ MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC =
MY_HIP_EXE = $(shell which hipcc)
MY_HIP_PATH = $(dir ${MY_HIP_EXE})
FFT_INC = -DFFT_HIPFFT
FFT_PATH =
FFT_LIB =
FFT_LIB = -L${MY_HIP_PATH}../lib -lhipfft
# JPEG and/or PNG library
# see discussion in Section 3.5.4 of manual

View File

@ -13,6 +13,7 @@
/* ----------------------------------------------------------------------
Contributing author (ratio and subset) : Jake Gissinger (U Colorado)
Contributing author (maxtry & overlap) : Eugen Rozic (IRB, Zagreb)
------------------------------------------------------------------------- */
#include "create_atoms.h"
@ -44,6 +45,7 @@ using namespace MathConst;
#define BIG 1.0e30
#define EPSILON 1.0e-6
#define LB_FACTOR 1.1
#define DEFAULT_MAXTRY 1000
enum { BOX, REGION, SINGLE, RANDOM };
enum { ATOM, MOLECULE };
@ -95,7 +97,6 @@ void CreateAtoms::command(int narg, char **arg)
region->init();
region->prematch();
iarg = 3;
;
} else if (strcmp(arg[1], "single") == 0) {
style = SINGLE;
if (narg < 5) error->all(FLERR, "Illegal create_atoms command");
@ -107,7 +108,9 @@ void CreateAtoms::command(int narg, char **arg)
style = RANDOM;
if (narg < 5) error->all(FLERR, "Illegal create_atoms command");
nrandom = utils::inumeric(FLERR, arg[2], false, lmp);
if (nrandom < 0) error->all(FLERR, "Illegal create_atoms command");
seed = utils::inumeric(FLERR, arg[3], false, lmp);
if (seed <= 0) error->all(FLERR, "Illegal create_atoms command");
if (strcmp(arg[4], "NULL") == 0)
region = nullptr;
else {
@ -126,11 +129,15 @@ void CreateAtoms::command(int narg, char **arg)
remapflag = 0;
mode = ATOM;
int molseed;
ranmol = nullptr;
varflag = 0;
vstr = xstr = ystr = zstr = nullptr;
quatone[0] = quatone[1] = quatone[2] = 0.0;
quat_user = 0;
quatone[0] = quatone[1] = quatone[2] = quatone[3] = 0.0;
subsetflag = NONE;
int subsetseed;
overlapflag = 0;
maxtry = DEFAULT_MAXTRY;
nbasis = domain->lattice->nbasis;
basistype = new int[nbasis];
@ -172,6 +179,8 @@ void CreateAtoms::command(int narg, char **arg)
error->all(FLERR, "Illegal create_atoms command");
iarg += 2;
} else if (strcmp(arg[iarg], "var") == 0) {
if (style == SINGLE) error->all(FLERR, "Illegal create_atoms command: "
"can't combine 'var' keyword with 'single' style!");
if (iarg + 2 > narg) error->all(FLERR, "Illegal create_atoms command");
delete[] vstr;
vstr = utils::strdup(arg[iarg + 1]);
@ -193,10 +202,10 @@ void CreateAtoms::command(int narg, char **arg)
iarg += 3;
} else if (strcmp(arg[iarg], "rotate") == 0) {
if (iarg + 5 > narg) error->all(FLERR, "Illegal create_atoms command");
quat_user = 1;
double thetaone;
double axisone[3];
thetaone = utils::numeric(FLERR, arg[iarg + 1], false, lmp) / 180.0 * MY_PI;
;
axisone[0] = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
axisone[1] = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
axisone[2] = utils::numeric(FLERR, arg[iarg + 4], false, lmp);
@ -222,38 +231,46 @@ void CreateAtoms::command(int narg, char **arg)
subsetseed = utils::inumeric(FLERR, arg[iarg + 2], false, lmp);
if (nsubset <= 0 || subsetseed <= 0) error->all(FLERR, "Illegal create_atoms command");
iarg += 3;
} else if (strcmp(arg[iarg], "overlap") == 0) {
if (style != RANDOM)
error->all(FLERR, "Create_atoms overlap can only be used with random style");
if (iarg + 2 > narg) error->all(FLERR, "Illegal create_atoms command");
overlap = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
if (overlap <= 0) error->all(FLERR, "Illegal create_atoms command");
overlapflag = 1;
iarg += 2;
} else if (strcmp(arg[iarg], "maxtry") == 0) {
if (style != RANDOM)
error->all(FLERR, "Create_atoms maxtry can only be used with random style");
if (iarg + 2 > narg) error->all(FLERR, "Illegal create_atoms command");
maxtry = utils::inumeric(FLERR, arg[iarg + 1], false, lmp);
if (maxtry <= 0) error->all(FLERR,"Illegal create_atoms command");
iarg += 2;
} else
error->all(FLERR, "Illegal create_atoms command");
}
// error checks
// error checks and further setup for mode = MOLECULE
if (mode == ATOM && (ntype <= 0 || ntype > atom->ntypes))
error->all(FLERR, "Invalid atom type in create_atoms command");
if (style == RANDOM) {
if (nrandom < 0) error->all(FLERR, "Illegal create_atoms command");
if (seed <= 0) error->all(FLERR, "Illegal create_atoms command");
}
// error check and further setup for mode = MOLECULE
ranmol = nullptr;
if (mode == MOLECULE) {
if (onemol->xflag == 0) error->all(FLERR, "Create_atoms molecule must have coordinates");
if (onemol->typeflag == 0) error->all(FLERR, "Create_atoms molecule must have atom types");
if (mode == ATOM) {
if (ntype <= 0 || ntype > atom->ntypes)
error->all(FLERR, "Invalid atom type in create_atoms command");
} else if (mode == MOLECULE) {
if (onemol->xflag == 0)
error->all(FLERR, "Create_atoms molecule must have coordinates");
if (onemol->typeflag == 0)
error->all(FLERR, "Create_atoms molecule must have atom types");
if (ntype + onemol->ntypes <= 0 || ntype + onemol->ntypes > atom->ntypes)
error->all(FLERR, "Invalid atom type in create_atoms mol command");
if (onemol->tag_require && !atom->tag_enable)
error->all(FLERR, "Create_atoms molecule has atom IDs, but system does not");
onemol->check_attributes(0);
// create_atoms uses geoemetric center of molecule for insertion
onemol->compute_center();
// use geometric center of molecule for insertion
// molecule random number generator, different for each proc
onemol->compute_center();
ranmol = new RanMars(lmp, molseed + comm->me);
}
@ -305,6 +322,7 @@ void CreateAtoms::command(int narg, char **arg)
xone[0] *= domain->lattice->xlattice;
xone[1] *= domain->lattice->ylattice;
xone[2] *= domain->lattice->zlattice;
overlap *= domain->lattice->xlattice;
}
// set bounds for my proc in sublo[3] & subhi[3]
@ -376,7 +394,7 @@ void CreateAtoms::command(int narg, char **arg)
}
}
// Record wall time for atom creation
// record wall time for atom creation
MPI_Barrier(world);
double time1 = platform::walltime();
@ -630,12 +648,11 @@ void CreateAtoms::add_single()
if (coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] && coord[1] < subhi[1] &&
coord[2] >= sublo[2] && coord[2] < subhi[2]) {
if (mode == ATOM)
if (mode == ATOM) {
atom->avec->create_atom(ntype, xone);
else if (quatone[0] == 0.0 && quatone[1] == 0.0 && quatone[2] == 0.0)
} else {
add_molecule(xone);
else
add_molecule(xone, quatone);
}
}
}
@ -646,9 +663,16 @@ void CreateAtoms::add_single()
void CreateAtoms::add_random()
{
double xlo, ylo, zlo, xhi, yhi, zhi, zmid;
double delx, dely, delz, distsq, odistsq;
double lamda[3], *coord;
double *boxlo, *boxhi;
if (overlapflag) {
double odist = overlap;
if (mode == MOLECULE) odist += onemol->molradius;
odistsq = odist*odist;
}
// random number generator, same for all procs
// warm up the generator 30x to avoid correlations in first-particle
// positions if runs are repeated with consecutive seeds
@ -689,49 +713,101 @@ void CreateAtoms::add_random()
zhi = MIN(zhi, region->extent_zhi);
}
// generate random positions for each new atom/molecule within bounding box
// iterate until atom is within region, variable, and triclinic simulation box
// if final atom position is in my subbox, create it
if (xlo > xhi || ylo > yhi || zlo > zhi)
error->all(FLERR, "No overlap of box and region for create_atoms");
int valid;
// insert Nrandom new atom/molecule into simulation box
int ntry,success;
int ninsert = 0;
for (int i = 0; i < nrandom; i++) {
while (true) {
// attempt to insert an atom/molecule up to maxtry times
// criteria for insertion: region, variable, triclinic box, overlap
success = 0;
ntry = 0;
while (ntry < maxtry) {
ntry++;
xone[0] = xlo + random->uniform() * (xhi - xlo);
xone[1] = ylo + random->uniform() * (yhi - ylo);
xone[2] = zlo + random->uniform() * (zhi - zlo);
if (domain->dimension == 2) xone[2] = zmid;
valid = 1;
if (region && (region->match(xone[0], xone[1], xone[2]) == 0)) valid = 0;
if (varflag && vartest(xone) == 0) valid = 0;
if (region && (region->match(xone[0], xone[1], xone[2]) == 0)) continue;
if (varflag && vartest(xone) == 0) continue;
if (triclinic) {
domain->x2lamda(xone, lamda);
coord = lamda;
if (coord[0] < boxlo[0] || coord[0] >= boxhi[0] || coord[1] < boxlo[1] ||
coord[1] >= boxhi[1] || coord[2] < boxlo[2] || coord[2] >= boxhi[2])
valid = 0;
} else
continue;
} else {
coord = xone;
}
if (valid) break;
// check for overlap of new atom/mol with all other atoms
// including prior insertions
// minimum_image() needed to account for distances across PBC
// new molecule only checks its center pt against others
// odistsq is expanded for mode=MOLECULE to account for molecule size
if (overlapflag) {
double **x = atom->x;
int nlocal = atom->nlocal;
int reject = 0;
for (int i = 0; i < nlocal; i++) {
delx = xone[0] - x[i][0];
dely = xone[1] - x[i][1];
delz = xone[2] - x[i][2];
domain->minimum_image(delx, dely, delz);
distsq = delx*delx + dely*dely + delz*delz;
if (distsq < odistsq) {
reject = 1;
break;
}
}
int reject_any;
MPI_Allreduce(&reject, &reject_any, 1, MPI_INT, MPI_MAX, world);
if (reject_any) continue;
}
// all tests passed
success = 1;
break;
}
// insertion failed, advance to next atom/molecule
if (!success) continue;
// insertion criteria were met
// if final atom position is in my subbox, create it
// if triclinic, coord is now in lamda units
ninsert++;
if (coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] &&
coord[1] < subhi[1] && coord[2] >= sublo[2] && coord[2] < subhi[2]) {
if (mode == ATOM)
if (mode == ATOM) {
atom->avec->create_atom(ntype, xone);
else if (quatone[0] == 0 && quatone[1] == 0 && quatone[2] == 0)
} else {
add_molecule(xone);
else
add_molecule(xone, quatone);
}
}
}
// warn if did not successfully insert Nrandom atoms/molecules
if (ninsert < nrandom && comm->me == 0)
error->warning(FLERR, "Only inserted {} particles out of {}", ninsert, nrandom);
// clean-up
delete random;
@ -785,6 +861,7 @@ void CreateAtoms::add_lattice()
xmax = ymax = zmax = -BIG;
// convert to lattice coordinates and set bounding box
domain->lattice->bbox(1, bboxlo[0], bboxlo[1], bboxlo[2], xmin, ymin, zmin, xmax, ymax, zmax);
domain->lattice->bbox(1, bboxhi[0], bboxlo[1], bboxlo[2], xmin, ymin, zmin, xmax, ymax, zmax);
domain->lattice->bbox(1, bboxlo[0], bboxhi[1], bboxlo[2], xmin, ymin, zmin, xmax, ymax, zmax);
@ -905,7 +982,8 @@ void CreateAtoms::loop_lattice(int action)
// if variable test specified, eval variable
if (varflag && vartest(x) == 0) continue;
if (varflag && vartest(x) == 0)
continue;
// test if atom/molecule position is in my subbox
@ -924,21 +1002,19 @@ void CreateAtoms::loop_lattice(int action)
// add = add an atom or entire molecule to my list of atoms
if (action == INSERT) {
if (mode == ATOM)
if (mode == ATOM) {
atom->avec->create_atom(basistype[m], x);
else if (quatone[0] == 0 && quatone[1] == 0 && quatone[2] == 0)
} else {
add_molecule(x);
else
add_molecule(x, quatone);
}
} else if (action == COUNT) {
if (nlatt == MAXSMALLINT) nlatt_overflow = 1;
} else if (action == INSERT_SELECTED && flag[nlatt]) {
if (mode == ATOM)
if (mode == ATOM) {
atom->avec->create_atom(basistype[m], x);
else if (quatone[0] == 0 && quatone[1] == 0 && quatone[2] == 0)
} else {
add_molecule(x);
else
add_molecule(x, quatone);
}
}
nlatt++;
@ -949,43 +1025,42 @@ void CreateAtoms::loop_lattice(int action)
}
/* ----------------------------------------------------------------------
add a randomly rotated molecule with its center at center
if quat_user set, perform requested rotation
add a molecule with its center at center
------------------------------------------------------------------------- */
void CreateAtoms::add_molecule(double *center, double *quat_user)
void CreateAtoms::add_molecule(double *center)
{
int n;
double r[3], rotmat[3][3], quat[4], xnew[3];
double r[3], rotmat[3][3];
if (quat_user) {
quat[0] = quat_user[0];
quat[1] = quat_user[1];
quat[2] = quat_user[2];
quat[3] = quat_user[3];
} else {
// use quatone as-is if user set it
// else generate random quaternion in quatone
if (!quat_user) {
if (domain->dimension == 3) {
r[0] = ranmol->uniform() - 0.5;
r[1] = ranmol->uniform() - 0.5;
r[2] = ranmol->uniform() - 0.5;
MathExtra::norm3(r);
} else {
r[0] = r[1] = 0.0;
r[2] = 1.0;
}
MathExtra::norm3(r);
double theta = ranmol->uniform() * MY_2PI;
MathExtra::axisangle_to_quat(r, theta, quat);
MathExtra::axisangle_to_quat(r, theta, quatone);
}
MathExtra::quat_to_mat(quat, rotmat);
onemol->quat_external = quat;
MathExtra::quat_to_mat(quatone, rotmat);
// create atoms in molecule with atom ID = 0 and mol ID = 0
// reset in caller after all molecules created by all procs
// IDs are reset in caller after all molecules created by all procs
// pass add_molecule_atom an offset of 0 since don't know
// max tag of atoms in previous molecules at this point
// onemol->quat_external is used by atom->add_moleclue_atom()
int natoms = onemol->natoms;
onemol->quat_external = quatone;
int n, natoms = onemol->natoms;
double xnew[3];
for (int m = 0; m < natoms; m++) {
MathExtra::matvec(rotmat, onemol->dx[m], xnew);
MathExtra::add3(xnew, center, xnew);

View File

@ -32,6 +32,10 @@ class CreateAtoms : public Command {
private:
int ntype, style, mode, nbasis, nrandom, seed;
int remapflag;
int maxtry;
int quat_user;
int overlapflag;
double overlap;
int subsetflag;
bigint nsubset;
double subsetfrac;
@ -62,7 +66,7 @@ class CreateAtoms : public Command {
void add_random();
void add_lattice();
void loop_lattice(int);
void add_molecule(double *, double * = nullptr);
void add_molecule(double *);
int vartest(double *); // evaluate a variable with new atom position
};

View File

@ -32,6 +32,8 @@
#define LMP_FFT_LIB "MKL FFT"
#elif defined(FFT_CUFFT)
#define LMP_FFT_LIB "cuFFT"
#elif defined(FFT_HIPFFT)
#define LMP_FFT_LIB "hipFFT"
#else
#define LMP_FFT_LIB "KISS FFT"
#endif