diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 18afe550b9..3aaa38ca13 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -947,6 +947,12 @@ if(PKG_KSPACE) else() message(STATUS "Kokkos FFT: cuFFT") endif() + elseif(Kokkos_ENABLE_HIP) + if(FFT STREQUAL "KISS") + message(STATUS "Kokkos FFT: KISS") + else() + message(STATUS "Kokkos FFT: hipFFT") + endif() else() message(STATUS "Kokkos FFT: ${FFT}") endif() diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 4e35e6dcc0..6fa5892e78 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -130,6 +130,11 @@ if(PKG_KSPACE) target_compile_definitions(lammps PRIVATE -DFFT_CUFFT) target_link_libraries(lammps PRIVATE cufft) endif() + elseif(Kokkos_ENABLE_HIP) + if(NOT (FFT STREQUAL "KISS")) + target_compile_definitions(lammps PRIVATE -DFFT_HIPFFT) + target_link_libraries(lammps PRIVATE hipfft) + endif() endif() endif() diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst index 8caf043c42..261fb17b3c 100644 --- a/doc/src/Build_extras.rst +++ b/doc/src/Build_extras.rst @@ -641,6 +641,20 @@ This list was last updated for version 3.5.0 of the Kokkos library. -D CMAKE_CXX_COMPILER=${HOME}/lammps/lib/kokkos/bin/nvcc_wrapper + For AMD or NVIDIA GPUs using HIP, set these variables: + + .. code-block:: bash + + -D Kokkos_ARCH_HOSTARCH=yes # HOSTARCH = HOST from list above + -D Kokkos_ARCH_GPUARCH=yes # GPUARCH = GPU from list above + -D Kokkos_ENABLE_HIP=yes + -D Kokkos_ENABLE_OPENMP=yes + + This will enable FFTs on the GPU, either by the internal KISSFFT library + or with the hipFFT wrapper library, which will call out to the + platform-appropriate vendor library: rocFFT on AMD GPUs or cuFFT on + NVIDIA GPUs. + To simplify compilation, four preset files are included in the ``cmake/presets`` folder, ``kokkos-serial.cmake``, ``kokkos-openmp.cmake``, ``kokkos-cuda.cmake``, and @@ -707,6 +721,15 @@ This list was last updated for version 3.5.0 of the Kokkos library. KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) CC = mpicxx -cxx=$(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper + For AMD or NVIDIA GPUs using HIP: + + .. code-block:: make + + KOKKOS_DEVICES = HIP + KOKKOS_ARCH = HOSTARCH,GPUARCH # HOSTARCH = HOST from list above that is hosting the GPU + # GPUARCH = GPU from list above + FFT_INC = -DFFT_HIPFFT # enable use of hipFFT (optional) + FFT_LIB = -lhipfft # link to hipFFT library Advanced KOKKOS compilation settings ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/src/create_atoms.rst b/doc/src/create_atoms.rst index d10498baf2..84d176c38f 100644 --- a/doc/src/create_atoms.rst +++ b/doc/src/create_atoms.rst @@ -26,11 +26,11 @@ Syntax region-ID = create atoms within this region, use NULL for entire simulation box * zero or more keyword/value pairs may be appended -* keyword = *mol* or *basis* or *ratio* or *subset* or *remap* or *var* or *set* or *rotate* or *units* +* keyword = *mol* or *basis* or *ratio* or *subset* or *remap* or *var* or *set* or *rotate* or *overlap* or *maxtry* or *units* .. parsed-literal:: - *mol* value = template-ID seed + *mol* values = template-ID seed template-ID = ID of molecule template specified in a separate :doc:`molecule ` command seed = random # seed (positive integer) *basis* values = M itype @@ -50,6 +50,10 @@ Syntax *rotate* values = theta Rx Ry Rz theta = rotation angle for single molecule (degrees) Rx,Ry,Rz = rotation vector for single molecule + *overlap* value = Doverlap + Doverlap = only insert if at least this distance from all existing atoms + *maxtry* value = Ntry + Ntry = number of attempts to insert a particle before failure *units* value = *lattice* or *box* *lattice* = the geometry is defined in lattice units *box* = the geometry is defined in simulation box units @@ -64,20 +68,22 @@ Examples create_atoms 3 region regsphere basis 2 3 ratio 0.5 74637 create_atoms 3 single 0 0 5 create_atoms 1 box var v set x xpos set y ypos + create_atoms 2 random 50 12345 NULL overlap 2.0 maxtry 50 Description """"""""""" -This command creates atoms (or molecules) on a lattice, or a single -atom (or molecule), or a random collection of atoms (or molecules), as -an alternative to reading in their coordinates explicitly via a -:doc:`read_data ` or :doc:`read_restart ` -command. A simulation box must already exist, which is typically -created via the :doc:`create_box ` command. Before using -this command, a lattice must also be defined using the -:doc:`lattice ` command, unless you specify the *single* style -with units = box or the *random* style. For the remainder of this doc -page, a created atom or molecule is referred to as a "particle". +This command creates atoms (or molecules) within the simulation box, +either on a lattice, or a single atom (or molecule), or a random +collection of atoms (or molecules). It is an alternative to reading +in atom coordinates explicitly via a :doc:`read_data ` or +:doc:`read_restart ` command. A simulation box must +already exist, which is typically created via the :doc:`create_box +` command. Before using this command, a lattice must also +be defined using the :doc:`lattice ` command, unless you +specify the *single* style with units = box or the *random* style. +For the remainder of this doc page, a created atom or molecule is +referred to as a "particle". If created particles are individual atoms, they are assigned the specified atom *type*, though this can be altered via the *basis* @@ -97,58 +103,70 @@ particular dimension, LAMMPS is careful to put exactly one particle at the boundary (on either side of the box), not zero or two. For the *region* style, a geometric volume is filled with particles on -the lattice. This volume what is inside the simulation box and is -also consistent with the region volume. See the :doc:`region ` -command for details. Note that a region can be specified so that its -"volume" is either inside or outside a geometric boundary. Also note -that if your region is the same size as a periodic simulation box (in -some dimension), LAMMPS does not implement the same logic described -above as for the *box* style, to insure exactly one particle at -periodic boundaries. if this is what you desire, you should either -use the *box* style, or tweak the region size to get precisely the -particles you want. +the lattice. This volume is what is both inside the simulation box +and also consistent with the region volume. See the :doc:`region +` command for details. Note that a region can be specified so +that its "volume" is either inside or outside its geometric boundary. +Also note that if a region is the same size as a periodic simulation +box (in some dimension), LAMMPS does NOT implement the same logic +described above for the *box* style, to insure exactly one particle at +periodic boundaries. If this is desired, you should either use the +*box* style, or tweak the region size to get precisely the particles +you want. For the *single* style, a single particle is added to the system at the specified coordinates. This can be useful for debugging purposes or to create a tiny system with a handful of particles at specified positions. -For the *random* style, N particles are added to the system at +For the *random* style, *N* particles are added to the system at randomly generated coordinates, which can be useful for generating an amorphous system. The particles are created one by one using the -specified random number *seed*, resulting in the same set of particles +specified random number *seed*, resulting in the same set of particle coordinates, independent of how many processors are being used in the -simulation. If the *region-ID* argument is specified as NULL, then -the created particles will be anywhere in the simulation box. If a +simulation. Unless the *overlap* keyword is specified, particles +created by the *random* style will typically be highly overlapped. +Various additional criteria can be used to accept or reject a random +particle insertion; see the keyword discussion below. Multiple +attempts per particle are made (see the *maxtry* keyword) until the +insertion is either successful or fails. If this command fails to add +all requested *N* particles, a warning will be output. + +If the *region-ID* argument is specified as NULL, then the randomly +created particles will be anywhere in the simulation box. If a *region-ID* is specified, a geometric volume is filled which is both inside the simulation box and is also consistent with the region -volume. See the :doc:`region ` command for details. Note that -a region can be specified so that its "volume" is either inside or -outside a geometric boundary. +volume. See the :doc:`region ` command for details. Note +that a region can be specified so that its "volume" is either inside +or outside its geometric boundary. -.. note:: +Note that the create_atoms command adds particles to those that +already exist. This means it can be used to add particles to a system +previously read in from a data or restart file. Or the create_atoms +command can be used multiple times, to add multiple sets of particles +to the simulation. For example, grain boundaries can be created, by +interleaving the create_atoms command with :doc:`lattice ` +commands specifying different orientations. - Particles generated by the *random* style will typically be - highly overlapped which will cause many interatomic potentials to - compute large energies and forces. Thus you should either perform an - :doc:`energy minimization ` or run dynamics with :doc:`fix nve/limit ` to equilibrate such a system, before - running normal dynamics. +When this command is used, care should be taken to insure the +resulting system does not contain particles which are highly +overlapped. Such overlaps will cause many interatomic potentials to +compute huge energies and forces, leading to bad dynamics. There are +several strategies to avoid this problem: -Note that this command adds particles to those that already exist. -This means it can be used to add particles to a system previously read -in from a data or restart file. Or the create_atoms command can be -used multiple times, to add multiple sets of particles to the -simulation. For example, grain boundaries can be created, by -interleaving create_atoms with :doc:`lattice ` commands -specifying different orientations. By using the create_atoms command -in conjunction with the :doc:`delete_atoms ` command, -reasonably complex geometries can be created, or a protein can be -solvated with a surrounding box of water molecules. +* Use the :doc:`delete_atoms overlap ` command after + create_atoms. For example, this strategy can be used to overlay and + surround a large protein molecule with a volume of water molecules, + then delete water molecules that overlap with the protein atoms. -In all these cases, care should be taken to insure that new atoms do -not overlap existing atoms inappropriately, especially if molecules -are being added. The :doc:`delete_atoms ` command can be -used to remove overlapping atoms or molecules. +* For the *random* style, use the optional *overlap* keyword to avoid + overlaps when each new particle is created. + +* Before running dynamics on an overlapped system, perform an + :doc:`energy minimization `. Or run initial dynamics with + :doc:`pair_style soft ` or with :doc:`fix nve/limit + ` to un-overlap the particles, before running normal + dynamics. .. note:: @@ -156,12 +174,13 @@ used to remove overlapping atoms or molecules. that are outside the simulation box; they will just be ignored by LAMMPS. This is true even if you are using shrink-wrapped box boundaries, as specified by the :doc:`boundary ` command. - However, you can first use the :doc:`change_box ` command to - temporarily expand the box, then add atoms via create_atoms, then - finally use change_box command again if needed to re-shrink-wrap the - new atoms. See the :doc:`change_box ` page for an - example of how to do this, using the create_atoms *single* style to - insert a new atom outside the current simulation box. + However, you can first use the :doc:`change_box ` + command to temporarily expand the box, then add atoms via + create_atoms, then finally use change_box command again if needed + to re-shrink-wrap the new atoms. See the :doc:`change_box + ` doc page for an example of how to do this, using the + create_atoms *single* style to insert a new atom outside the + current simulation box. ---------- @@ -180,17 +199,19 @@ Using a lattice to add molecules, e.g. via the *box* or *region* or points, except that entire molecules are added at each point, i.e. on the point defined by each basis atom in the unit cell as it tiles the simulation box or region. This is done by placing the geometric -center of the molecule at the lattice point, and giving the molecule a -random orientation about the point. The random *seed* specified with -the *mol* keyword is used for this operation, and the random numbers -generated by each processor are different. This means the coordinates -of individual atoms (in the molecules) will be different when running -on different numbers of processors, unlike when atoms are being -created in parallel. +center of the molecule at the lattice point, and (by default) giving +the molecule a random orientation about the point. The random *seed* +specified with the *mol* keyword is used for this operation, and the +random numbers generated by each processor are different. This means +the coordinates of individual atoms (in the molecules) will be +different when running on different numbers of processors, unlike when +atoms are being created in parallel. -Also note that because of the random rotations, it may be important to -use a lattice with a large enough spacing that adjacent molecules will -not overlap, regardless of their relative orientations. +Note that with random rotations, it may be important to use a lattice +with a large enough spacing that adjacent molecules will not overlap, +regardless of their relative orientations. See the description of the +*rotate* keyword below, which overrides the default random orientation +and inserts all molecules at a specified orientation. .. note:: @@ -204,7 +225,7 @@ not overlap, regardless of their relative orientations. ---------- -This is the meaning of the other allowed keywords. +This is the meaning of the other optional keywords. The *basis* keyword is only used when atoms (not molecules) are being created. It specifies an atom type that will be assigned to specific @@ -234,18 +255,24 @@ and no particle is created if its position is outside the box. The *var* and *set* keywords can be used together to provide a criterion for accepting or rejecting the addition of an individual -atom, based on its coordinates. The *name* specified for the *var* -keyword is the name of an :doc:`equal-style variable ` which -should evaluate to a zero or non-zero value based on one or two or -three variables which will store the x, y, or z coordinates of an atom -(one variable per coordinate). If used, these other variables must be -:doc:`internal-style variables ` defined in the input script; -their initial numeric value can be anything. They must be +atom, based on its coordinates. They apply to all styles except +*single*. The *name* specified for the *var* keyword is the name of +an :doc:`equal-style variable ` which should evaluate to a +zero or non-zero value based on one or two or three variables which +will store the x, y, or z coordinates of an atom (one variable per +coordinate). If used, these other variables must be +:doc:`internal-style variables ` defined in the input +script; their initial numeric value can be anything. They must be internal-style variables, because this command resets their values directly. The *set* keyword is used to identify the names of these other variables, one variable for the x-coordinate of a created atom, one for y, and one for z. +.. figure:: img/sinusoid.jpg + :figwidth: 50% + :align: right + :target: _images/sinusoid.jpg + When an atom is created, its x,y,z coordinates become the values for any *set* variable that is defined. The *var* variable is then evaluated. If the returned value is 0.0, the atom is not created. If @@ -259,28 +286,26 @@ the sinusoid would appear to be "smoother". Also note the use of the "xlat" and "ylat" :doc:`thermo_style ` keywords which converts lattice spacings to distance. +.. only:: html + + (Click on the image for a larger version) + .. code-block:: LAMMPS - dimension 2 - variable x equal 100 - variable y equal 25 - lattice hex 0.8442 - region box block 0 $x 0 $y -0.5 0.5 - create_box 1 box + dimension 2 + variable x equal 100 + variable y equal 25 + lattice hex 0.8442 + region box block 0 $x 0 $y -0.5 0.5 + create_box 1 box - variable xx internal 0.0 - variable yy internal 0.0 - variable v equal "(0.2*v_y*ylat * cos(v_xx/xlat * 2.0*PI*4.0/v_x) + 0.5*v_y*ylat - v_yy) > 0.0" - create_atoms 1 box var v set x xx set y yy - write_dump all atom sinusoid.lammpstrj + variable xx internal 0.0 + variable yy internal 0.0 + variable v equal "(0.2*v_y*ylat * cos(v_xx/xlat * 2.0*PI*4.0/v_x) + 0.5*v_y*ylat - v_yy) > 0.0" + create_atoms 1 box var v set x xx set y yy + write_dump all atom sinusoid.lammpstrj -.. image:: img/sinusoid.jpg - :scale: 50% - :align: center - -.. raw:: html - - Click on the image for a larger version. +----- The *rotate* keyword allows specification of the orientation at which molecules are inserted. The axis of rotation is @@ -291,10 +316,72 @@ the atoms around the rotation axis is consistent with the right-hand rule: if your right-hand's thumb points along *R*, then your fingers wrap around the axis in the direction of rotation. +The *overlap* keyword only applies to the *random* style. It prevents +newly created particles from being created closer than the specified +*Doverlap* distance from any other particle. When the particles being +created are molecules, the radius of the molecule (from its geometric +center) is added to *Doverlap*. If particles have finite size (see +:doc:`atom_style sphere ` for example) *Doverlap* should +be specified large enough to include the particle size in the +non-overlapping criterion. + +.. note:: + + Checking for overlaps is a costly O(N(N+M)) operation for inserting + *N* new particles into a system with *M* existing particles. This + is because distances to all *M* existing particles are computed for + each new particle that is added. Thus the intended use of this + keyword is to add relatively small numbers of particles to systems + which remain at a relatively low density even after the new + particles are created. Careful use of the *maxtry* keyword in + combination with *overlap* is recommended. See the discussion + above about systems with overlapped particles for alternate + strategies that allow for overlapped insertions. + +The *maxtry* keyword only applies to the *random* style. It limits +the number of attempts to generate valid coordinates for a single new +particle that satisfy all requirements imposed by the *region*, *var*, +and *overlap* keywords. The default is 10 attempts per particle +before the loop over the requested *N* particles advances to the next +particle. Note that if insertion success is unlikely (e.g. inserting +new particles into a dense system using the *overlap* keyword), +setting the *maxtry* keyword to a large value may result in this +command running for a long time. + +.. figure:: img/overlap.png + :figwidth: 30% + :align: right + :target: _images/overlap.png + +Here is an example for the *random* style using these commands + +.. code-block:: LAMMPS + + units lj + dimension 2 + region box block 0 50 0 50 -0.5 0.5 + create_box 1 box + create_atoms 1 random 2000 13487 NULL overlap 1.0 maxtry 50 + pair_style lj/cut 2.5 + pair_coeff 1 1 1.0 1.0 2.5 + +to produce a system as shown in the image with 1520 particles (out of +2000 requested) that are moderately dense and which have no overlaps +sufficient to prevent the LJ pair_style from running properly (because +the overlap criterion = 1.0). The create_atoms command ran for 0.3 s +on a single CPU core. + +.. only:: html + + (Click on the image for a larger version) + +----- + The *units* keyword determines the meaning of the distance units used to specify the coordinates of the one particle created by the *single* -style. A *box* value selects standard distance units as defined by -the :doc:`units ` command, e.g. Angstroms for units = real or +style, or the overlap distance *Doverlap* by the *overlap* keyword. A +*box* value selects standard distance units as defined by the +:doc:`units ` command, e.g. Angstroms for units = real or metal. A *lattice* value means the distance units are in lattice spacings. @@ -315,9 +402,10 @@ assigned to created molecules in a similar fashion. Aside from their ID, atom type, and xyz position, other properties of created atoms are set to default values, depending on which quantities -are defined by the chosen :doc:`atom style `. See the :doc:`atom style ` command for more details. See the -:doc:`set ` and :doc:`velocity ` commands for info on how -to change these values. +are defined by the chosen :doc:`atom style `. See the +:doc:`atom style ` command for more details. See the +:doc:`set ` and :doc:`velocity ` commands for info on +how to change these values. * charge = 0.0 * dipole moment magnitude = 0.0 @@ -373,4 +461,4 @@ Default The default for the *basis* keyword is that all created atoms are assigned the argument *type* as their atom type (when single atoms are being created). The other defaults are *remap* = no, *rotate* = -random, and *units* = lattice. +random, *overlap* not checked, *maxtry* = 10, and *units* = lattice. diff --git a/doc/src/img/overlap.png b/doc/src/img/overlap.png new file mode 100644 index 0000000000..b10bf4b083 Binary files /dev/null and b/doc/src/img/overlap.png differ diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index bdebbb65a9..a1db9502fc 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -1326,6 +1326,7 @@ hiID Hijazi Hilger Hinestrosa +hipFFT histo histogrammed histogramming @@ -1951,6 +1952,7 @@ maxsize maxspecial maxSteps maxstrain +maxtry maxwell Maxwellian maxX @@ -2951,6 +2953,7 @@ rnage rng rNEMD ro +rocFFT Rochus Rockett rocksalt diff --git a/src/.gitignore b/src/.gitignore index f4db0fc27a..decadd20ff 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -627,6 +627,7 @@ /ewald.h /ewald_cg.cpp /ewald_cg.h +/ewald_const.h /ewald_dipole.cpp /ewald_dipole.h /ewald_dipole_spin.cpp diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 737c2f20b5..acaed71bd9 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -46,13 +46,17 @@ FFT3dKokkos::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int #if defined(FFT_MKL) if (ngpus > 0 && execution_space == Device) - lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos CUDA on GPUs"); + lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos on GPUs"); #elif defined(FFT_FFTW3) if (ngpus > 0 && execution_space == Device) - lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos CUDA on GPUs"); + lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos on GPUs"); #elif defined(FFT_CUFFT) if (ngpus > 0 && execution_space == Host) - lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos CUDA on the host CPUs"); + lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos on the host CPUs"); +#elif defined(FFT_HIPFFT) + if (ngpus > 0 && execution_space == Host) + lmp->error->all(FLERR,"Cannot use the hipFFT library with Kokkos on the host CPUs"); + #elif defined(FFT_KISSFFT) // The compiler can't statically determine the stack size needed for // recursive function calls in KISS FFT and the default per-thread @@ -145,7 +149,7 @@ public: KOKKOS_INLINE_FUNCTION void operator() (const int &i) const { -#if defined(FFT_FFTW3) || defined(FFT_CUFFT) +#if defined(FFT_FFTW3) || defined(FFT_CUFFT) || defined(FFT_HIPFFT) FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); *(out_ptr++) *= norm; *(out_ptr++) *= norm; @@ -227,6 +231,8 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); #elif defined(FFT_CUFFT) cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); + #elif defined(FFT_HIPFFT) + hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #else typename FFT_AT::t_FFT_DATA_1d d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); @@ -271,6 +277,8 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); #elif defined(FFT_CUFFT) cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); + #elif defined(FFT_HIPFFT) + hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #else d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) @@ -313,6 +321,8 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); #elif defined(FFT_CUFFT) cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); + #elif defined(FFT_HIPFFT) + hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) @@ -699,6 +709,23 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl &nslow,1,plan->length3, CUFFT_TYPE,plan->total3/plan->length3); +#elif defined(FFT_HIPFFT) + + hipfftPlanMany(&(plan->plan_fast), 1, &nfast, + &nfast,1,plan->length1, + &nfast,1,plan->length1, + HIPFFT_TYPE,plan->total1/plan->length1); + + hipfftPlanMany(&(plan->plan_mid), 1, &nmid, + &nmid,1,plan->length2, + &nmid,1,plan->length2, + HIPFFT_TYPE,plan->total2/plan->length2); + + hipfftPlanMany(&(plan->plan_slow), 1, &nslow, + &nslow,1,plan->length3, + &nslow,1,plan->length3, + HIPFFT_TYPE,plan->total3/plan->length3); + #else /* FFT_KISS */ kissfftKK = new KissFFTKokkos(); @@ -863,6 +890,10 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); +#elif defined(FFT_HIPFFT) + hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); + hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); + hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else kiss_fft_functor f; typename FFT_AT::t_FFT_DATA_1d d_tmp = diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index 12f0f787d1..f4bc3fe58a 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -60,6 +60,10 @@ struct fft_plan_3d_kokkos { cufftHandle plan_fast; cufftHandle plan_mid; cufftHandle plan_slow; +#elif defined(FFT_HIPFFT) + hipfftHandle plan_fast; + hipfftHandle plan_mid; + hipfftHandle plan_slow; #else kiss_fft_state_kokkos cfg_fast_forward; kiss_fft_state_kokkos cfg_fast_backward; diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index 5ff9a669aa..388ecbb963 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -36,8 +36,8 @@ #endif -// with KOKKOS in CUDA mode we can only have -// CUFFT or KISSFFT, thus undefine all other +// with KOKKOS in CUDA or HIP mode we can only have +// CUFFT/HIPFFT or KISSFFT, thus undefine all other // FFTs here, since they may be valid in fft3d.cpp #ifdef KOKKOS_ENABLE_CUDA @@ -53,10 +53,26 @@ # if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT) # define FFT_KISSFFT # endif +#elif defined(KOKKOS_ENABLE_HIP) +# if defined(FFT_FFTW) +# undef FFT_FFTW +# endif +# if defined(FFT_FFTW3) +# undef FFT_FFTW3 +# endif +# if defined(FFT_MKL) +# undef FFT_MKL +# endif +# if !defined(FFT_HIPFFT) && !defined(FFT_KISSFFT) +# define FFT_KISSFFT +# endif #else # if defined(FFT_CUFFT) # error "Must enable CUDA with KOKKOS to use -DFFT_CUFFT" # endif +# if defined(FFT_HIPFFT) +# error "Must enable HIP with KOKKOS to use -DFFT_HIPFFT" +# endif // if user set FFTW, it means FFTW3 # ifdef FFT_FFTW # define FFT_FFTW3 @@ -97,6 +113,17 @@ #define CUFFT_TYPE CUFFT_Z2Z typedef cufftDoubleComplex FFT_DATA; #endif +#elif defined(FFT_HIPFFT) + #include "hipfft.h" + #if defined(FFT_SINGLE) + #define hipfftExec hipfftExecC2C + #define HIPFFT_TYPE HIPFFT_C2C + typedef hipfftComplex FFT_DATA; + #else + #define hipfftExec hipfftExecZ2Z + #define HIPFFT_TYPE HIPFFT_Z2Z + typedef hipfftDoubleComplex FFT_DATA; + #endif #else #if defined(FFT_SINGLE) #define kiss_fft_scalar float diff --git a/src/MAKE/MACHINES/Makefile.crusher_kokkos b/src/MAKE/MACHINES/Makefile.crusher_kokkos index 7dc1447d4e..5744f2d9bf 100644 --- a/src/MAKE/MACHINES/Makefile.crusher_kokkos +++ b/src/MAKE/MACHINES/Makefile.crusher_kokkos @@ -1,4 +1,4 @@ -# crusher_kokkos = KOKKOS/HIP, AMD MI250X GPU and AMD EPYC 7A53 "Optimized 3rd Gen EPYC" CPU, Cray MPICH, hipcc compiler +# crusher_kokkos = KOKKOS/HIP, AMD MI250X GPU and AMD EPYC 7A53 "Optimized 3rd Gen EPYC" CPU, Cray MPICH, hipcc compiler, hipFFT SHELL = /bin/sh @@ -54,9 +54,12 @@ MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa # PATH = path for FFT library # LIB = name of FFT library -FFT_INC = +MY_HIP_EXE = $(shell which hipcc) +MY_HIP_PATH = $(dir ${MY_HIP_EXE}) + +FFT_INC = -DFFT_HIPFFT FFT_PATH = -FFT_LIB = +FFT_LIB = -L${MY_HIP_PATH}../lib -lhipfft # JPEG and/or PNG library # see discussion in Section 3.5.4 of manual diff --git a/src/MAKE/MACHINES/Makefile.spock_kokkos b/src/MAKE/MACHINES/Makefile.spock_kokkos index a85ebb3039..5771184287 100644 --- a/src/MAKE/MACHINES/Makefile.spock_kokkos +++ b/src/MAKE/MACHINES/Makefile.spock_kokkos @@ -1,4 +1,4 @@ -# spock_kokkos = KOKKOS/HIP, AMD MI100 GPU and AMD EPYC 7662 "Rome" CPU, Cray MPICH, hipcc compiler +# spock_kokkos = KOKKOS/HIP, AMD MI100 GPU and AMD EPYC 7662 "Rome" CPU, Cray MPICH, hipcc compiler, hipFFT SHELL = /bin/sh @@ -54,9 +54,12 @@ MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa # PATH = path for FFT library # LIB = name of FFT library -FFT_INC = +MY_HIP_EXE = $(shell which hipcc) +MY_HIP_PATH = $(dir ${MY_HIP_EXE}) + +FFT_INC = -DFFT_HIPFFT FFT_PATH = -FFT_LIB = +FFT_LIB = -L${MY_HIP_PATH}../lib -lhipfft # JPEG and/or PNG library # see discussion in Section 3.5.4 of manual diff --git a/src/create_atoms.cpp b/src/create_atoms.cpp index 8690b7c28e..191f9cb407 100644 --- a/src/create_atoms.cpp +++ b/src/create_atoms.cpp @@ -13,6 +13,7 @@ /* ---------------------------------------------------------------------- Contributing author (ratio and subset) : Jake Gissinger (U Colorado) + Contributing author (maxtry & overlap) : Eugen Rozic (IRB, Zagreb) ------------------------------------------------------------------------- */ #include "create_atoms.h" @@ -44,6 +45,7 @@ using namespace MathConst; #define BIG 1.0e30 #define EPSILON 1.0e-6 #define LB_FACTOR 1.1 +#define DEFAULT_MAXTRY 1000 enum { BOX, REGION, SINGLE, RANDOM }; enum { ATOM, MOLECULE }; @@ -95,7 +97,6 @@ void CreateAtoms::command(int narg, char **arg) region->init(); region->prematch(); iarg = 3; - ; } else if (strcmp(arg[1], "single") == 0) { style = SINGLE; if (narg < 5) error->all(FLERR, "Illegal create_atoms command"); @@ -107,7 +108,9 @@ void CreateAtoms::command(int narg, char **arg) style = RANDOM; if (narg < 5) error->all(FLERR, "Illegal create_atoms command"); nrandom = utils::inumeric(FLERR, arg[2], false, lmp); + if (nrandom < 0) error->all(FLERR, "Illegal create_atoms command"); seed = utils::inumeric(FLERR, arg[3], false, lmp); + if (seed <= 0) error->all(FLERR, "Illegal create_atoms command"); if (strcmp(arg[4], "NULL") == 0) region = nullptr; else { @@ -126,11 +129,15 @@ void CreateAtoms::command(int narg, char **arg) remapflag = 0; mode = ATOM; int molseed; + ranmol = nullptr; varflag = 0; vstr = xstr = ystr = zstr = nullptr; - quatone[0] = quatone[1] = quatone[2] = 0.0; + quat_user = 0; + quatone[0] = quatone[1] = quatone[2] = quatone[3] = 0.0; subsetflag = NONE; int subsetseed; + overlapflag = 0; + maxtry = DEFAULT_MAXTRY; nbasis = domain->lattice->nbasis; basistype = new int[nbasis]; @@ -172,6 +179,8 @@ void CreateAtoms::command(int narg, char **arg) error->all(FLERR, "Illegal create_atoms command"); iarg += 2; } else if (strcmp(arg[iarg], "var") == 0) { + if (style == SINGLE) error->all(FLERR, "Illegal create_atoms command: " + "can't combine 'var' keyword with 'single' style!"); if (iarg + 2 > narg) error->all(FLERR, "Illegal create_atoms command"); delete[] vstr; vstr = utils::strdup(arg[iarg + 1]); @@ -193,10 +202,10 @@ void CreateAtoms::command(int narg, char **arg) iarg += 3; } else if (strcmp(arg[iarg], "rotate") == 0) { if (iarg + 5 > narg) error->all(FLERR, "Illegal create_atoms command"); + quat_user = 1; double thetaone; double axisone[3]; thetaone = utils::numeric(FLERR, arg[iarg + 1], false, lmp) / 180.0 * MY_PI; - ; axisone[0] = utils::numeric(FLERR, arg[iarg + 2], false, lmp); axisone[1] = utils::numeric(FLERR, arg[iarg + 3], false, lmp); axisone[2] = utils::numeric(FLERR, arg[iarg + 4], false, lmp); @@ -222,38 +231,46 @@ void CreateAtoms::command(int narg, char **arg) subsetseed = utils::inumeric(FLERR, arg[iarg + 2], false, lmp); if (nsubset <= 0 || subsetseed <= 0) error->all(FLERR, "Illegal create_atoms command"); iarg += 3; + } else if (strcmp(arg[iarg], "overlap") == 0) { + if (style != RANDOM) + error->all(FLERR, "Create_atoms overlap can only be used with random style"); + if (iarg + 2 > narg) error->all(FLERR, "Illegal create_atoms command"); + overlap = utils::numeric(FLERR, arg[iarg + 1], false, lmp); + if (overlap <= 0) error->all(FLERR, "Illegal create_atoms command"); + overlapflag = 1; + iarg += 2; + } else if (strcmp(arg[iarg], "maxtry") == 0) { + if (style != RANDOM) + error->all(FLERR, "Create_atoms maxtry can only be used with random style"); + if (iarg + 2 > narg) error->all(FLERR, "Illegal create_atoms command"); + maxtry = utils::inumeric(FLERR, arg[iarg + 1], false, lmp); + if (maxtry <= 0) error->all(FLERR,"Illegal create_atoms command"); + iarg += 2; } else error->all(FLERR, "Illegal create_atoms command"); } - // error checks + // error checks and further setup for mode = MOLECULE - if (mode == ATOM && (ntype <= 0 || ntype > atom->ntypes)) - error->all(FLERR, "Invalid atom type in create_atoms command"); - - if (style == RANDOM) { - if (nrandom < 0) error->all(FLERR, "Illegal create_atoms command"); - if (seed <= 0) error->all(FLERR, "Illegal create_atoms command"); - } - - // error check and further setup for mode = MOLECULE - - ranmol = nullptr; - if (mode == MOLECULE) { - if (onemol->xflag == 0) error->all(FLERR, "Create_atoms molecule must have coordinates"); - if (onemol->typeflag == 0) error->all(FLERR, "Create_atoms molecule must have atom types"); + if (mode == ATOM) { + if (ntype <= 0 || ntype > atom->ntypes) + error->all(FLERR, "Invalid atom type in create_atoms command"); + } else if (mode == MOLECULE) { + if (onemol->xflag == 0) + error->all(FLERR, "Create_atoms molecule must have coordinates"); + if (onemol->typeflag == 0) + error->all(FLERR, "Create_atoms molecule must have atom types"); if (ntype + onemol->ntypes <= 0 || ntype + onemol->ntypes > atom->ntypes) error->all(FLERR, "Invalid atom type in create_atoms mol command"); if (onemol->tag_require && !atom->tag_enable) error->all(FLERR, "Create_atoms molecule has atom IDs, but system does not"); + onemol->check_attributes(0); - // create_atoms uses geoemetric center of molecule for insertion - - onemol->compute_center(); - + // use geometric center of molecule for insertion // molecule random number generator, different for each proc + onemol->compute_center(); ranmol = new RanMars(lmp, molseed + comm->me); } @@ -305,6 +322,7 @@ void CreateAtoms::command(int narg, char **arg) xone[0] *= domain->lattice->xlattice; xone[1] *= domain->lattice->ylattice; xone[2] *= domain->lattice->zlattice; + overlap *= domain->lattice->xlattice; } // set bounds for my proc in sublo[3] & subhi[3] @@ -376,7 +394,7 @@ void CreateAtoms::command(int narg, char **arg) } } - // Record wall time for atom creation + // record wall time for atom creation MPI_Barrier(world); double time1 = platform::walltime(); @@ -630,12 +648,11 @@ void CreateAtoms::add_single() if (coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] && coord[1] < subhi[1] && coord[2] >= sublo[2] && coord[2] < subhi[2]) { - if (mode == ATOM) + if (mode == ATOM) { atom->avec->create_atom(ntype, xone); - else if (quatone[0] == 0.0 && quatone[1] == 0.0 && quatone[2] == 0.0) + } else { add_molecule(xone); - else - add_molecule(xone, quatone); + } } } @@ -646,9 +663,16 @@ void CreateAtoms::add_single() void CreateAtoms::add_random() { double xlo, ylo, zlo, xhi, yhi, zhi, zmid; + double delx, dely, delz, distsq, odistsq; double lamda[3], *coord; double *boxlo, *boxhi; + if (overlapflag) { + double odist = overlap; + if (mode == MOLECULE) odist += onemol->molradius; + odistsq = odist*odist; + } + // random number generator, same for all procs // warm up the generator 30x to avoid correlations in first-particle // positions if runs are repeated with consecutive seeds @@ -689,49 +713,101 @@ void CreateAtoms::add_random() zhi = MIN(zhi, region->extent_zhi); } - // generate random positions for each new atom/molecule within bounding box - // iterate until atom is within region, variable, and triclinic simulation box - // if final atom position is in my subbox, create it - if (xlo > xhi || ylo > yhi || zlo > zhi) error->all(FLERR, "No overlap of box and region for create_atoms"); - int valid; + // insert Nrandom new atom/molecule into simulation box + + int ntry,success; + int ninsert = 0; + for (int i = 0; i < nrandom; i++) { - while (true) { + + // attempt to insert an atom/molecule up to maxtry times + // criteria for insertion: region, variable, triclinic box, overlap + + success = 0; + ntry = 0; + + while (ntry < maxtry) { + ntry++; + xone[0] = xlo + random->uniform() * (xhi - xlo); xone[1] = ylo + random->uniform() * (yhi - ylo); xone[2] = zlo + random->uniform() * (zhi - zlo); if (domain->dimension == 2) xone[2] = zmid; - valid = 1; - if (region && (region->match(xone[0], xone[1], xone[2]) == 0)) valid = 0; - if (varflag && vartest(xone) == 0) valid = 0; + if (region && (region->match(xone[0], xone[1], xone[2]) == 0)) continue; + if (varflag && vartest(xone) == 0) continue; + if (triclinic) { domain->x2lamda(xone, lamda); coord = lamda; if (coord[0] < boxlo[0] || coord[0] >= boxhi[0] || coord[1] < boxlo[1] || coord[1] >= boxhi[1] || coord[2] < boxlo[2] || coord[2] >= boxhi[2]) - valid = 0; - } else + continue; + } else { coord = xone; + } - if (valid) break; + // check for overlap of new atom/mol with all other atoms + // including prior insertions + // minimum_image() needed to account for distances across PBC + // new molecule only checks its center pt against others + // odistsq is expanded for mode=MOLECULE to account for molecule size + + if (overlapflag) { + double **x = atom->x; + int nlocal = atom->nlocal; + + int reject = 0; + for (int i = 0; i < nlocal; i++) { + delx = xone[0] - x[i][0]; + dely = xone[1] - x[i][1]; + delz = xone[2] - x[i][2]; + domain->minimum_image(delx, dely, delz); + distsq = delx*delx + dely*dely + delz*delz; + if (distsq < odistsq) { + reject = 1; + break; + } + } + int reject_any; + MPI_Allreduce(&reject, &reject_any, 1, MPI_INT, MPI_MAX, world); + if (reject_any) continue; + } + + // all tests passed + + success = 1; + break; } + // insertion failed, advance to next atom/molecule + + if (!success) continue; + + // insertion criteria were met + // if final atom position is in my subbox, create it // if triclinic, coord is now in lamda units + ninsert++; + if (coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] && coord[1] < subhi[1] && coord[2] >= sublo[2] && coord[2] < subhi[2]) { - if (mode == ATOM) + if (mode == ATOM) { atom->avec->create_atom(ntype, xone); - else if (quatone[0] == 0 && quatone[1] == 0 && quatone[2] == 0) + } else { add_molecule(xone); - else - add_molecule(xone, quatone); + } } } + // warn if did not successfully insert Nrandom atoms/molecules + + if (ninsert < nrandom && comm->me == 0) + error->warning(FLERR, "Only inserted {} particles out of {}", ninsert, nrandom); + // clean-up delete random; @@ -785,6 +861,7 @@ void CreateAtoms::add_lattice() xmax = ymax = zmax = -BIG; // convert to lattice coordinates and set bounding box + domain->lattice->bbox(1, bboxlo[0], bboxlo[1], bboxlo[2], xmin, ymin, zmin, xmax, ymax, zmax); domain->lattice->bbox(1, bboxhi[0], bboxlo[1], bboxlo[2], xmin, ymin, zmin, xmax, ymax, zmax); domain->lattice->bbox(1, bboxlo[0], bboxhi[1], bboxlo[2], xmin, ymin, zmin, xmax, ymax, zmax); @@ -905,7 +982,8 @@ void CreateAtoms::loop_lattice(int action) // if variable test specified, eval variable - if (varflag && vartest(x) == 0) continue; + if (varflag && vartest(x) == 0) + continue; // test if atom/molecule position is in my subbox @@ -924,21 +1002,19 @@ void CreateAtoms::loop_lattice(int action) // add = add an atom or entire molecule to my list of atoms if (action == INSERT) { - if (mode == ATOM) + if (mode == ATOM) { atom->avec->create_atom(basistype[m], x); - else if (quatone[0] == 0 && quatone[1] == 0 && quatone[2] == 0) + } else { add_molecule(x); - else - add_molecule(x, quatone); + } } else if (action == COUNT) { if (nlatt == MAXSMALLINT) nlatt_overflow = 1; } else if (action == INSERT_SELECTED && flag[nlatt]) { - if (mode == ATOM) + if (mode == ATOM) { atom->avec->create_atom(basistype[m], x); - else if (quatone[0] == 0 && quatone[1] == 0 && quatone[2] == 0) + } else { add_molecule(x); - else - add_molecule(x, quatone); + } } nlatt++; @@ -949,43 +1025,42 @@ void CreateAtoms::loop_lattice(int action) } /* ---------------------------------------------------------------------- - add a randomly rotated molecule with its center at center - if quat_user set, perform requested rotation + add a molecule with its center at center ------------------------------------------------------------------------- */ -void CreateAtoms::add_molecule(double *center, double *quat_user) +void CreateAtoms::add_molecule(double *center) { - int n; - double r[3], rotmat[3][3], quat[4], xnew[3]; + double r[3], rotmat[3][3]; - if (quat_user) { - quat[0] = quat_user[0]; - quat[1] = quat_user[1]; - quat[2] = quat_user[2]; - quat[3] = quat_user[3]; - } else { + // use quatone as-is if user set it + // else generate random quaternion in quatone + + if (!quat_user) { if (domain->dimension == 3) { r[0] = ranmol->uniform() - 0.5; r[1] = ranmol->uniform() - 0.5; r[2] = ranmol->uniform() - 0.5; + MathExtra::norm3(r); } else { r[0] = r[1] = 0.0; r[2] = 1.0; } - MathExtra::norm3(r); double theta = ranmol->uniform() * MY_2PI; - MathExtra::axisangle_to_quat(r, theta, quat); + MathExtra::axisangle_to_quat(r, theta, quatone); } - MathExtra::quat_to_mat(quat, rotmat); - onemol->quat_external = quat; + MathExtra::quat_to_mat(quatone, rotmat); // create atoms in molecule with atom ID = 0 and mol ID = 0 - // reset in caller after all molecules created by all procs + // IDs are reset in caller after all molecules created by all procs // pass add_molecule_atom an offset of 0 since don't know // max tag of atoms in previous molecules at this point + // onemol->quat_external is used by atom->add_moleclue_atom() - int natoms = onemol->natoms; + onemol->quat_external = quatone; + + int n, natoms = onemol->natoms; + double xnew[3]; for (int m = 0; m < natoms; m++) { MathExtra::matvec(rotmat, onemol->dx[m], xnew); MathExtra::add3(xnew, center, xnew); diff --git a/src/create_atoms.h b/src/create_atoms.h index aa21504896..ba2ce7cbed 100644 --- a/src/create_atoms.h +++ b/src/create_atoms.h @@ -32,6 +32,10 @@ class CreateAtoms : public Command { private: int ntype, style, mode, nbasis, nrandom, seed; int remapflag; + int maxtry; + int quat_user; + int overlapflag; + double overlap; int subsetflag; bigint nsubset; double subsetfrac; @@ -62,7 +66,7 @@ class CreateAtoms : public Command { void add_random(); void add_lattice(); void loop_lattice(int); - void add_molecule(double *, double * = nullptr); + void add_molecule(double *); int vartest(double *); // evaluate a variable with new atom position }; diff --git a/src/lmpfftsettings.h b/src/lmpfftsettings.h index b55e4acb77..7c33d2b26c 100644 --- a/src/lmpfftsettings.h +++ b/src/lmpfftsettings.h @@ -32,6 +32,8 @@ #define LMP_FFT_LIB "MKL FFT" #elif defined(FFT_CUFFT) #define LMP_FFT_LIB "cuFFT" +#elif defined(FFT_HIPFFT) +#define LMP_FFT_LIB "hipFFT" #else #define LMP_FFT_LIB "KISS FFT" #endif