diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index a0b872ba85..ce55c83b08 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -16,11 +16,6 @@ endif() if(Kokkos_ENABLE_OPENMP) if(NOT BUILD_OMP) message(FATAL_ERROR "Must enable BUILD_OMP with Kokkos_ENABLE_OPENMP") - else() - # NVHPC/(AMD)Clang does not seem to provide a detectable OpenMP version, but is far beyond version 3.1 - if((OpenMP_CXX_VERSION VERSION_LESS 3.1) AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC") OR (CMAKE_CXX_COMPILER_ID STREQUAL "Clang"))) - message(FATAL_ERROR "Compiler must support OpenMP 3.1 or later with Kokkos_ENABLE_OPENMP") - endif() endif() endif() ######################################################################## diff --git a/doc/src/Commands_bond.rst b/doc/src/Commands_bond.rst index aaf706b5df..ef36b6b7c4 100644 --- a/doc/src/Commands_bond.rst +++ b/doc/src/Commands_bond.rst @@ -124,7 +124,7 @@ OPT. * * * :doc:`charmm (iko) ` - * :doc:`charmmfsw ` + * :doc:`charmmfsw (k) ` * :doc:`class2 (ko) ` * :doc:`cosine/shift/exp (o) ` * :doc:`fourier (io) ` diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst index e7761e7bee..9f2bdbce79 100644 --- a/doc/src/Commands_pair.rst +++ b/doc/src/Commands_pair.rst @@ -146,7 +146,7 @@ OPT. * :doc:`lj/charmm/coul/long/soft (o) ` * :doc:`lj/charmm/coul/msm (o) ` * :doc:`lj/charmmfsw/coul/charmmfsh ` - * :doc:`lj/charmmfsw/coul/long ` + * :doc:`lj/charmmfsw/coul/long (k) ` * :doc:`lj/class2 (gko) ` * :doc:`lj/class2/coul/cut (ko) ` * :doc:`lj/class2/coul/cut/soft ` diff --git a/doc/src/Developer_updating.rst b/doc/src/Developer_updating.rst index 36c6974b30..cd61eaa5a1 100644 --- a/doc/src/Developer_updating.rst +++ b/doc/src/Developer_updating.rst @@ -20,6 +20,7 @@ Available topics in mostly chronological order are: - `Use ev_init() to initialize variables derived from eflag and vflag`_ - `Use utils::numeric() functions instead of force->numeric()`_ - `Use utils::open_potential() function to open potential files`_ +- `Use symbolic Atom and AtomVec constants instead of numerical values`_ - `Simplify customized error messages`_ - `Use of "override" instead of "virtual"`_ - `Simplified and more compact neighbor list requests`_ @@ -196,6 +197,71 @@ New: fp = utils::open_potential(filename, lmp); +Use symbolic Atom and AtomVec constants instead of numerical values +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. versionchanged:: 18Sep2020 + +Properties in LAMMPS that were represented by integer values (0, 1, +2, 3) to indicate settings in the ``Atom`` and ``AtomVec`` classes (or +classes derived from it) (and its derived classes) have been converted +to use scoped enumerators instead. + +.. list-table:: + :header-rows: 1 + :widths: auto + + * - Symbolic Constant + - Value + - Symbolic Constant + - Value + * - Atom::GROW + - 0 + - Atom::MAP_NONE + - 0 + * - Atom::RESTART + - 1 + - Atom::MAP_ARRAY + - 1 + * - Atom::BORDER + - 2 + - Atom::MAP_HASH + - 2 + * - Atom::ATOMIC + - 0 + - Atom::MAP_YES + - 3 + * - Atom::MOLECULAR + - 1 + - AtomVec::PER_ATOM + - 0 + * - Atom::TEMPLATE + - 2 + - AtomVec::PER_TYPE + - 1 + +Old: + +.. code-block:: c++ + + molecular = 0; + mass_type = 1; + if (atom->molecular == 2) + if (atom->map_style == 2) + atom->add_callback(0); + atom->delete_callback(id,1); + +New: + +.. code-block:: c++ + + molecular = Atom::ATOMIC; + mass_type = AtomVec::PER_TYPE; + if (atom->molecular == Atom::TEMPLATE) + if (atom->map_style == Atom::MAP_HASH) + atom->add_callback(Atom::GROW); + atom->delete_callback(id,Atom::RESTART); + Simplify customized error messages ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/src/angle_charmm.rst b/doc/src/angle_charmm.rst index 425ed7e4f1..655b860a28 100644 --- a/doc/src/angle_charmm.rst +++ b/doc/src/angle_charmm.rst @@ -70,7 +70,9 @@ for more info. Related commands """""""""""""""" -:doc:`angle_coeff ` +:doc:`angle_coeff `, :doc:`pair_style lj/charmm variants `, +:doc:`dihedral_style charmm `, +:doc:`dihedral_style charmmfsw `, :doc:`fix cmap ` Default """"""" diff --git a/doc/src/angle_lepton.rst b/doc/src/angle_lepton.rst index 20fa5b1fee..22873f5765 100644 --- a/doc/src/angle_lepton.rst +++ b/doc/src/angle_lepton.rst @@ -11,7 +11,16 @@ Syntax .. code-block:: LAMMPS - angle_style lepton + angle_style style args + +* style = *lepton* +* args = optional arguments + +.. parsed-literal:: + + args = *auto_offset* or *no_offset* + *auto_offset* = offset the potential energy so that the value at theta0 is 0.0 (default) + *no_offset* = do not offset the potential energy Examples """""""" @@ -19,6 +28,7 @@ Examples .. code-block:: LAMMPS angle_style lepton + angle_style lepton no_offset angle_coeff 1 120.0 "k*theta^2; k=250.0" angle_coeff 2 90.0 "k2*theta^2 + k3*theta^3 + k4*theta^4; k2=300.0; k3=-100.0; k4=50.0" @@ -41,6 +51,13 @@ angle coefficient. For example `"200.0*theta^2"` represents a U_{angle,i} = K (\theta_i - \theta_0)^2 = K \theta^2 \qquad \theta = \theta_i - \theta_0 +.. versionchanged:: TBD + +By default the potential energy U is shifted so that the value U is 0.0 +for $theta = theta_0$. This is equivalent to using the optional keyword +*auto_offset*. When using the keyword *no_offset* instead, the +potential energy is not shifted. + The `Lepton library `_, that the *lepton* angle style interfaces with, evaluates this expression string at run time to compute the pairwise energy. It also creates an diff --git a/doc/src/bond_bpm_rotational.rst b/doc/src/bond_bpm_rotational.rst index 7459d491d6..6734bd7bfe 100644 --- a/doc/src/bond_bpm_rotational.rst +++ b/doc/src/bond_bpm_rotational.rst @@ -147,8 +147,8 @@ By default, pair forces are not calculated between bonded particles. Pair forces can alternatively be overlaid on top of bond forces by setting the *overlay/pair* keyword to *yes*. These settings require specific :doc:`special_bonds ` settings described in the -restrictions. Further details can be found in the :doc:`how to -` page on BPMs. +restrictions. Further details can be found in the :doc:`how to ` +page on BPMs. .. versionadded:: 28Mar2023 diff --git a/doc/src/bond_bpm_spring.rst b/doc/src/bond_bpm_spring.rst index 04ff4d5991..a03c832249 100644 --- a/doc/src/bond_bpm_spring.rst +++ b/doc/src/bond_bpm_spring.rst @@ -113,8 +113,8 @@ By default, pair forces are not calculated between bonded particles. Pair forces can alternatively be overlaid on top of bond forces by setting the *overlay/pair* keyword to *yes*. These settings require specific :doc:`special_bonds ` settings described in the -restrictions. Further details can be found in the :doc:`how to -` page on BPMs. +restrictions. Further details can be found in the :doc:`how to ` +page on BPMs. .. versionadded:: 28Mar2023 diff --git a/doc/src/bond_lepton.rst b/doc/src/bond_lepton.rst index adfd30627d..9429535af8 100644 --- a/doc/src/bond_lepton.rst +++ b/doc/src/bond_lepton.rst @@ -11,7 +11,16 @@ Syntax .. code-block:: LAMMPS - bond_style lepton + bond_style style args + +* style = *lepton* +* args = optional arguments + +.. parsed-literal:: + + args = *auto_offset* or *no_offset* + *auto_offset* = offset the potential energy so that the value at r0 is 0.0 (default) + *no_offset* = do not offset the potential energy Examples """""""" @@ -19,6 +28,7 @@ Examples .. code-block:: LAMMPS bond_style lepton + bond_style lepton no_offset bond_coeff 1 1.5 "k*r^2; k=250.0" bond_coeff 2 1.1 "k2*r^2 + k3*r^3 + k4*r^4; k2=300.0; k3=-100.0; k4=50.0" @@ -40,6 +50,13 @@ constant *K* of 200.0 energy units: U_{bond,i} = K (r_i - r_0)^2 = K r^2 \qquad r = r_i - r_0 +.. versionchanged:: TBD + +By default the potential energy U is shifted so that he value U is 0.0 +for $r = r_0$. This is equivalent to using the optional keyword +*auto_offset*. When using the keyword *no_offset* instead, the +potential energy is not shifted. + The `Lepton library `_, that the *lepton* bond style interfaces with, evaluates this expression string at run time to compute the pairwise energy. It also creates an analytical diff --git a/doc/src/dihedral_charmm.rst b/doc/src/dihedral_charmm.rst index cc792693a2..a5652bc74e 100644 --- a/doc/src/dihedral_charmm.rst +++ b/doc/src/dihedral_charmm.rst @@ -3,6 +3,7 @@ .. index:: dihedral_style charmm/kk .. index:: dihedral_style charmm/omp .. index:: dihedral_style charmmfsw +.. index:: dihedral_style charmmfsw/kk dihedral_style charmm command ============================= @@ -12,6 +13,8 @@ Accelerator Variants: *charmm/intel*, *charmm/kk*, *charmm/omp* dihedral_style charmmfsw command ================================ +Accelerator Variants: *charmmfsw/kk* + Syntax """""" @@ -144,7 +147,9 @@ for more info. Related commands """""""""""""""" -:doc:`dihedral_coeff ` +:doc:`dihedral_coeff `, +:doc:`pair_style lj/charmm variants `, +:doc:`angle_style charmm `, :doc:`fix cmap ` Default """"""" diff --git a/doc/src/fix_qeq.rst b/doc/src/fix_qeq.rst index bace7af0ca..f353e9a998 100644 --- a/doc/src/fix_qeq.rst +++ b/doc/src/fix_qeq.rst @@ -232,8 +232,6 @@ These fixes are part of the QEQ package. They are only enabled if LAMMPS was built with that package. See the :doc:`Build package ` page for more info. -These qeq fixes are not compatible with the GPU and USER-INTEL packages. - These qeq fixes will ignore electric field contributions from :doc:`fix efield `. diff --git a/doc/src/molecule.rst b/doc/src/molecule.rst index b930a9fc65..e1770ced2a 100644 --- a/doc/src/molecule.rst +++ b/doc/src/molecule.rst @@ -126,14 +126,50 @@ molecule (header keyword = inertia). Format of a molecule file """"""""""""""""""""""""" -The format of an individual molecule file is similar but -(not identical) to the data file read by the :doc:`read_data ` -commands, and is as follows. +The format of an individual molecule file looks similar but is +different than that of a data file read by the :doc:`read_data ` +commands. Here is a simple example for a TIP3P water molecule: + +.. code-block:: + + # Water molecule. TIP3P geometry + # header section: + 3 atoms + 2 bonds + 1 angles + + # body section: + Coords + + 1 0.00000 -0.06556 0.00000 + 2 0.75695 0.52032 0.00000 + 3 -0.75695 0.52032 0.00000 + + Types + + 1 1 # O + 2 2 # H + 3 2 # H + + Charges + + 1 -0.834 + 2 0.417 + 3 0.417 + + Bonds + + 1 1 1 2 + 2 1 1 3 + + Angles + + 1 1 2 1 3 A molecule file has a header and a body. The header appears first. The -first line of the header and thus of the molecule file is *always* skipped; -it typically contains a description of the file or a comment from the software -that created the file. +first line of the header and thus of the molecule file is *always* +skipped; it typically contains a description of the file or a comment +from the software that created the file. Then lines are read one line at a time. Lines can have a trailing comment starting with '#' that is ignored. There *must* be at least one @@ -158,25 +194,62 @@ appear if the value(s) are different than the default, except when defining a *body* particle, which requires setting the number of *atoms* to 1, and setting the *inertia* in a specific section (see below). -* N *atoms* = # of atoms N in molecule, default = 0 -* Nb *bonds* = # of bonds Nb in molecule, default = 0 -* Na *angles* = # of angles Na in molecule, default = 0 -* Nd *dihedrals* = # of dihedrals Nd in molecule, default = 0 -* Ni *impropers* = # of impropers Ni in molecule, default = 0 -* Nf *fragments* = # of fragments Nf in molecule, default = 0 -* Ninteger Ndouble *body* = # of integer and floating-point values - in body particle, default = 0 -* Mtotal *mass* = total mass of molecule -* Xc Yc Zc *com* = coordinates of center-of-mass of molecule -* Ixx Iyy Izz Ixy Ixz Iyz *inertia* = 6 components of inertia tensor of molecule + .. list-table:: + :header-rows: 1 + :widths: auto -For *mass*, *com*, and *inertia*, the default is for LAMMPS to -calculate this quantity itself if needed, assuming the molecules -consist of a set of point particles or finite-size particles (with a -non-zero diameter) that do not overlap. If finite-size particles in -the molecule do overlap, LAMMPS will not account for the overlap -effects when calculating any of these 3 quantities, so you should -pre-compute them yourself and list the values in the file. + * - Number(s) + - Keyword + - Meaning + - Default Value + * - N + - atoms + - # of atoms N in molecule + - 0 + * - Nb + - bonds + - # of bonds Nb in molecule + - 0 + * - Na + - angles + - # of angles Na in molecule + - 0 + * - Nd + - dihedrals + - # of dihedrals Nd in molecule + - 0 + * - Ni + - impropers + - # of impropers Ni in molecule + - 0 + * - Nf + - fragments + - # of fragments Nf in molecule + - 0 + * - Ninteger Ndouble + - body + - # of integer and floating-point values in body particle + - 0 + * - Mtotal + - mass + - total mass of molecule + - computed + * - Xc Yc Zc + - com + - coordinates of center-of-mass of molecule + - computed + * - Ixx Iyy Izz Ixy Ixz Iyz + - inertia + - 6 components of inertia tensor of molecule + - computed + +For *mass*, *com*, and *inertia*, the default is for LAMMPS to calculate +this quantity itself if needed, assuming the molecules consist of a set +of point particles or finite-size particles (with a non-zero diameter) +that do **not** overlap. If finite-size particles in the molecule +**do** overlap, LAMMPS will not account for the overlap effects when +calculating any of these 3 quantities, so you should pre-compute them +yourself and list the values in the file. The mass and center-of-mass coordinates (Xc,Yc,Zc) are self-explanatory. The 6 moments of inertia (ixx,iyy,izz,ixy,ixz,iyz) @@ -188,7 +261,7 @@ internally. These are the allowed section keywords for the body of the file. -* *Coords, Types, Molecules, Fragments, Charges, Diameters, Masses* = atom-property sections +* *Coords, Types, Molecules, Fragments, Charges, Diameters, Dipoles, Masses* = atom-property sections * *Bonds, Angles, Dihedrals, Impropers* = molecular topology sections * *Special Bond Counts, Special Bonds* = special neighbor info * *Shake Flags, Shake Atoms, Shake Bond Types* = SHAKE info @@ -303,6 +376,21 @@ not listed, the default diameter of each atom in the molecule is 1.0. ---------- +.. versionadded:: TBD + +*Dipoles* section: + +* one line per atom +* line syntax: ID mux muy muz +* mux,muy,muz = x-, y-, and z-component of point dipole vector of atom + +This section is only allowed for :doc:`atom styles ` that +support particles with point dipoles, e.g. atom_style dipole. If not +listed, the default dipole component of each atom in the molecule is set +to 0.0. + +---------- + *Masses* section: * one line per atom diff --git a/doc/src/pair_charmm.rst b/doc/src/pair_charmm.rst index 8ff6508dea..30b03ad872 100644 --- a/doc/src/pair_charmm.rst +++ b/doc/src/pair_charmm.rst @@ -16,6 +16,7 @@ .. index:: pair_style lj/charmm/coul/msm/omp .. index:: pair_style lj/charmmfsw/coul/charmmfsh .. index:: pair_style lj/charmmfsw/coul/long +.. index:: pair_style lj/charmmfsw/coul/long/kk pair_style lj/charmm/coul/charmm command ======================================== @@ -43,6 +44,8 @@ pair_style lj/charmmfsw/coul/charmmfsh command pair_style lj/charmmfsw/coul/long command ========================================= +Accelerator Variants: *lj/charmmfsw/coul/long/kk* + Syntax """""" @@ -281,7 +284,9 @@ page for more info. Related commands """""""""""""""" -:doc:`pair_coeff ` +:doc:`pair_coeff `, :doc:`angle_style charmm `, +:doc:`dihedral_style charmm `, +:doc:`dihedral_style charmmfsw `, :doc:`fix cmap ` Default """"""" diff --git a/doc/src/pair_lepton.rst b/doc/src/pair_lepton.rst index 21e619a3d9..5b5dc698e7 100644 --- a/doc/src/pair_lepton.rst +++ b/doc/src/pair_lepton.rst @@ -72,7 +72,7 @@ interactions between particles which depend on the distance and have a cutoff. The potential function must be provided as an expression string using "r" as the distance variable. With pair style *lepton/coul* one may additionally reference the charges of the two atoms of the pair with -"qi" and "qj", respectively. With pair style *lepton/coul* one may +"qi" and "qj", respectively. With pair style *lepton/sphere* one may instead reference the radii of the two atoms of the pair with "radi" and "radj", respectively; this is half of the diameter that can be set in :doc:`data files ` or the :doc:`set command `. @@ -166,8 +166,8 @@ mixing. Thus, expressions for *all* I,J pairs must be specified explicitly. Only pair style *lepton* supports the :doc:`pair_modify shift ` -option for shifting the energy of the pair interaction so that it is -0 at the cutoff, pair styles *lepton/coul* and *lepton/sphere* do *not*. +option for shifting the potential energy of the pair interaction so that +it is 0 at the cutoff, pair styles *lepton/coul* and *lepton/sphere* do *not*. The :doc:`pair_modify table ` options are not relevant for the these pair styles. diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index 03e67b95cb..55ac81e04b 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -151,6 +151,7 @@ asphericity Asq assignee assively +associativity Asta Astart Astop diff --git a/examples/PACKAGES/cgdna/util/generate.py b/examples/PACKAGES/cgdna/util/generate.py index cd7465acdb..e85661abb1 100644 --- a/examples/PACKAGES/cgdna/util/generate.py +++ b/examples/PACKAGES/cgdna/util/generate.py @@ -22,22 +22,26 @@ """ Import basic modules """ + +# for python2/3 compatibility +from __future__ import print_function + import sys, os, timeit from timeit import default_timer as timer start_time = timer() """ -Try to import numpy; if failed, import a local version mynumpy +Try to import numpy; if failed, import a local version mynumpy which needs to be provided """ try: import numpy as np except: - print >> sys.stderr, "numpy not found. Exiting." + print("numpy not found. Exiting.", file=sys.stderr) sys.exit(1) """ -Check that the required arguments (box offset and size in simulation units +Check that the required arguments (box offset and size in simulation units and the sequence file were provided """ try: @@ -45,8 +49,8 @@ try: box_length = float(sys.argv[2]) infile = sys.argv[3] except: - print >> sys.stderr, "Usage: %s <%s> <%s> <%s>" % (sys.argv[0], \ - "box offset", "box length", "file with sequences") + print( "Usage: %s <%s> <%s> <%s>" % (sys.argv[0], \ + "box offset", "box length", "file with sequences"), file=sys.stderr) sys.exit(1) box = np.array ([box_length, box_length, box_length]) @@ -57,8 +61,7 @@ try: inp = open (infile, 'r') inp.close() except: - print >> sys.stderr, "Could not open file '%s' for reading. \ - Aborting." % infile + print( "Could not open file '%s' for reading. Aborting." % infile, file=sys.stderr) sys.exit(2) # return parts of a string @@ -86,7 +89,7 @@ Define auxiliary variables for the construction of a helix # center of the double strand CM_CENTER_DS = POS_BASE + 0.2 -# ideal distance between base sites of two nucleotides +# ideal distance between base sites of two nucleotides # which are to be base paired in a duplex BASE_BASE = 0.3897628551303122 @@ -118,7 +121,7 @@ strandnum = [] bonds = [] -""" +""" Convert local body frame to quaternion DOF """ def exyz_to_quat (mya1, mya3): @@ -135,25 +138,25 @@ def exyz_to_quat (mya1, mya3): # compute other components from it if q0sq >= 0.25: - myquat[0] = np.sqrt(q0sq) - myquat[1] = (mya2[2] - mya3[1]) / (4.0*myquat[0]) - myquat[2] = (mya3[0] - mya1[2]) / (4.0*myquat[0]) - myquat[3] = (mya1[1] - mya2[0]) / (4.0*myquat[0]) + myquat[0] = np.sqrt(q0sq) + myquat[1] = (mya2[2] - mya3[1]) / (4.0*myquat[0]) + myquat[2] = (mya3[0] - mya1[2]) / (4.0*myquat[0]) + myquat[3] = (mya1[1] - mya2[0]) / (4.0*myquat[0]) elif q1sq >= 0.25: - myquat[1] = np.sqrt(q1sq) - myquat[0] = (mya2[2] - mya3[1]) / (4.0*myquat[1]) - myquat[2] = (mya2[0] + mya1[1]) / (4.0*myquat[1]) - myquat[3] = (mya1[2] + mya3[0]) / (4.0*myquat[1]) + myquat[1] = np.sqrt(q1sq) + myquat[0] = (mya2[2] - mya3[1]) / (4.0*myquat[1]) + myquat[2] = (mya2[0] + mya1[1]) / (4.0*myquat[1]) + myquat[3] = (mya1[2] + mya3[0]) / (4.0*myquat[1]) elif q2sq >= 0.25: - myquat[2] = np.sqrt(q2sq) - myquat[0] = (mya3[0] - mya1[2]) / (4.0*myquat[2]) - myquat[1] = (mya2[0] + mya1[1]) / (4.0*myquat[2]) - myquat[3] = (mya3[1] + mya2[2]) / (4.0*myquat[2]) + myquat[2] = np.sqrt(q2sq) + myquat[0] = (mya3[0] - mya1[2]) / (4.0*myquat[2]) + myquat[1] = (mya2[0] + mya1[1]) / (4.0*myquat[2]) + myquat[3] = (mya3[1] + mya2[2]) / (4.0*myquat[2]) elif q3sq >= 0.25: - myquat[3] = np.sqrt(q3sq) - myquat[0] = (mya1[1] - mya2[0]) / (4.0*myquat[3]) - myquat[1] = (mya3[0] + mya1[2]) / (4.0*myquat[3]) - myquat[2] = (mya3[1] + mya2[2]) / (4.0*myquat[3]) + myquat[3] = np.sqrt(q3sq) + myquat[0] = (mya1[1] - mya2[0]) / (4.0*myquat[3]) + myquat[1] = (mya3[0] + mya1[2]) / (4.0*myquat[3]) + myquat[2] = (mya3[1] + mya2[2]) / (4.0*myquat[3]) norm = 1.0/np.sqrt(myquat[0]*myquat[0] + myquat[1]*myquat[1] + \ myquat[2]*myquat[2] + myquat[3]*myquat[3]) @@ -169,62 +172,62 @@ Adds a strand to the system by appending it to the array of previous strands """ def add_strands (mynewpositions, mynewa1s, mynewa3s): overlap = False - - # This is a simple check for each of the particles where for previously - # placed particles i we check whether it overlaps with any of the + + # This is a simple check for each of the particles where for previously + # placed particles i we check whether it overlaps with any of the # newly created particles j - print >> sys.stdout, "## Checking for overlaps" + print( "## Checking for overlaps", file=sys.stdout) - for i in xrange(len(positions)): + for i in range(len(positions)): - p = positions[i] - pa1 = a1s[i] + p = positions[i] + pa1 = a1s[i] - for j in xrange (len(mynewpositions)): + for j in range (len(mynewpositions)): - q = mynewpositions[j] - qa1 = mynewa1s[j] + q = mynewpositions[j] + qa1 = mynewa1s[j] - # skip particles that are anyway too far away - dr = p - q - dr -= box * np.rint (dr / box) - if np.dot(dr, dr) > RC2: - continue + # skip particles that are anyway too far away + dr = p - q + dr -= box * np.rint(dr / box) + if np.dot(dr, dr) > RC2: + continue - # base site and backbone site of the two particles + # base site and backbone site of the two particles p_pos_back = p + pa1 * POS_BACK p_pos_base = p + pa1 * POS_BASE q_pos_back = q + qa1 * POS_BACK q_pos_base = q + qa1 * POS_BASE - # check for no overlap between the two backbone sites + # check for no overlap between the two backbone sites dr = p_pos_back - q_pos_back - dr -= box * np.rint (dr / box) + dr -= box * np.rint(dr / box) if np.dot(dr, dr) < RC2_BACK: overlap = True - # check for no overlap between the two base sites + # check for no overlap between the two base sites dr = p_pos_base - q_pos_base - dr -= box * np.rint (dr / box) + dr -= box * np.rint(dr / box) if np.dot(dr, dr) < RC2_BASE: overlap = True - # check for no overlap between backbone site of particle p - # with base site of particle q + # check for no overlap between backbone site of particle p + # with base site of particle q dr = p_pos_back - q_pos_base dr -= box * np.rint (dr / box) if np.dot(dr, dr) < RC2_BACK_BASE: overlap = True - # check for no overlap between base site of particle p and - # backbone site of particle q + # check for no overlap between base site of particle p and + # backbone site of particle q dr = p_pos_base - q_pos_back dr -= box * np.rint (dr / box) if np.dot(dr, dr) < RC2_BACK_BASE: overlap = True - # exit if there is an overlap + # exit if there is an overlap if overlap: return False @@ -237,10 +240,10 @@ def add_strands (mynewpositions, mynewa1s, mynewa3s): a1s.append (p) for p in mynewa3s: a3s.append (p) - # calculate quaternion from local body frame and append - for ia in xrange(len(mynewpositions)): - mynewquaternions = exyz_to_quat(mynewa1s[ia],mynewa3s[ia]) - quaternions.append(mynewquaternions) + # calculate quaternion from local body frame and append + for ia in range(len(mynewpositions)): + mynewquaternions = exyz_to_quat(mynewa1s[ia],mynewa3s[ia]) + quaternions.append(mynewquaternions) return True @@ -281,7 +284,7 @@ def get_rotation_matrix(axis, anglest): [olc*x*z-st*y, olc*y*z+st*x, olc*z*z+ct]]) """ -Generates the position and orientation vectors of a +Generates the position and orientation vectors of a (single or double) strand from a sequence string """ def generate_strand(bp, sequence=None, start_pos=np.array([0, 0, 0]), \ @@ -295,76 +298,75 @@ def generate_strand(bp, sequence=None, start_pos=np.array([0, 0, 0]), \ # overall direction of the helix dir = np.array(dir, dtype=float) if sequence == None: - sequence = np.random.randint(1, 5, bp) + sequence = np.random.randint(1, 5, bp) - # the elseif here is most likely redundant + # the elseif here is most likely redundant elif len(sequence) != bp: - n = bp - len(sequence) - sequence += np.random.randint(1, 5, n) - print >> sys.stderr, "sequence is too short, adding %d random bases" % n + n = bp - len(sequence) + sequence += np.random.randint(1, 5, n) + print( "sequence is too short, adding %d random bases" % n, file=sys.stderr) # normalize direction dir_norm = np.sqrt(np.dot(dir,dir)) if dir_norm < 1e-10: - print >> sys.stderr, "direction must be a valid vector, \ - defaulting to (0, 0, 1)" - dir = np.array([0, 0, 1]) + print( "direction must be a valid vector, defaulting to (0, 0, 1)", file=sys.stderr) + dir = np.array([0, 0, 1]) else: dir /= dir_norm # find a vector orthogonal to dir to act as helix direction, # if not provided switch off random orientation if perp is None or perp is False: - v1 = np.random.random_sample(3) - v1 -= dir * (np.dot(dir, v1)) - v1 /= np.sqrt(sum(v1*v1)) + v1 = np.random.random_sample(3) + v1 -= dir * (np.dot(dir, v1)) + v1 /= np.sqrt(sum(v1*v1)) else: - v1 = perp; + v1 = perp; # generate rotational matrix representing the overall rotation of the helix R0 = get_rotation_matrix(dir, rot) - + # rotation matrix corresponding to one step along the helix R = get_rotation_matrix(dir, [1, "bp"]) - # set the vector a1 (backbone to base) to v1 + # set the vector a1 (backbone to base) to v1 a1 = v1 - - # apply the global rotation to a1 + + # apply the global rotation to a1 a1 = np.dot(R0, a1) - + # set the position of the fist backbone site to start_pos rb = np.array(start_pos) - + # set a3 to the direction of the helix a3 = dir for i in range(bp): # work out the position of the centre of mass of the nucleotide - rcdm = rb - CM_CENTER_DS * a1 - - # append to newpositions - mynewpositions.append(rcdm) - mynewa1s.append(a1) - mynewa3s.append(a3) - - # if we are not at the end of the helix, we work out a1 and rb for the - # next nucleotide along the helix - if i != bp - 1: - a1 = np.dot(R, a1) - rb += a3 * BASE_BASE + rcdm = rb - CM_CENTER_DS * a1 - # if we are working on a double strand, we do a cycle similar + # append to newpositions + mynewpositions.append(rcdm) + mynewa1s.append(a1) + mynewa3s.append(a3) + + # if we are not at the end of the helix, we work out a1 and rb for the + # next nucleotide along the helix + if i != bp - 1: + a1 = np.dot(R, a1) + rb += a3 * BASE_BASE + + # if we are working on a double strand, we do a cycle similar # to the previous one but backwards if double == True: - a1 = -a1 - a3 = -dir - R = R.transpose() - for i in range(bp): - rcdm = rb - CM_CENTER_DS * a1 - mynewpositions.append (rcdm) - mynewa1s.append (a1) - mynewa3s.append (a3) - a1 = np.dot(R, a1) - rb += a3 * BASE_BASE + a1 = -a1 + a3 = -dir + R = R.transpose() + for i in range(bp): + rcdm = rb - CM_CENTER_DS * a1 + mynewpositions.append (rcdm) + mynewa1s.append (a1) + mynewa3s.append (a3) + a1 = np.dot(R, a1) + rb += a3 * BASE_BASE assert (len (mynewpositions) > 0) @@ -391,10 +393,10 @@ def read_strands(filename): try: infile = open (filename) except: - print >> sys.stderr, "Could not open file '%s'. Aborting." % filename + print( "Could not open file '%s'. Aborting." % filename, file=sys.stderr ) sys.exit(2) - # This block works out the number of nucleotides and strands by reading + # This block works out the number of nucleotides and strands by reading # the number of non-empty lines in the input file and the number of letters, # taking the possible DOUBLE keyword into account. nstrands, nnucl, nbonds = 0, 0, 0 @@ -406,30 +408,29 @@ def read_strands(filename): if line[:6] == 'DOUBLE': line = line.split()[1] length = len(line) - print >> sys.stdout, "## Found duplex of %i base pairs" % length + print( "## Found duplex of %i base pairs" % length, file=sys.stdout) nnucl += 2*length nstrands += 2 - nbonds += (2*length-2) + nbonds += (2*length-2) else: line = line.split()[0] length = len(line) - print >> sys.stdout, \ - "## Found single strand of %i bases" % length + print( "## Found single strand of %i bases" % length, file=sys.stdout) nnucl += length nstrands += 1 - nbonds += length-1 + nbonds += length-1 # rewind the sequence input file infile.seek(0) - print >> sys.stdout, "## nstrands, nnucl = ", nstrands, nnucl + print( "## nstrands, nnucl = ", nstrands, nnucl, file=sys.stdout) # generate the data file in LAMMPS format try: out = open ("data.oxdna", "w") except: - print >> sys.stderr, "Could not open data file for writing. Aborting." + print( "Could not open data file for writing. Aborting.", file=sys.stderr) sys.exit(2) - + lines = infile.readlines() nlines = len(lines) i = 1 @@ -440,115 +441,114 @@ def read_strands(filename): line = line.upper().strip() # skip empty lines - if len(line) == 0: - i += 1 - continue + if len(line) == 0: + i += 1 + continue - # block for duplexes: last argument of the generate function - # is set to 'True' + # block for duplexes: last argument of the generate function + # is set to 'True' if line[:6] == 'DOUBLE': line = line.split()[1] length = len(line) seq = [(base_to_number[x]) for x in line] - myns += 1 - for b in xrange(length): - basetype.append(seq[b]) - strandnum.append(myns) + myns += 1 + for b in range(length): + basetype.append(seq[b]) + strandnum.append(myns) - for b in xrange(length-1): - bondpair = [noffset + b, noffset + b + 1] - bonds.append(bondpair) - noffset += length + for b in range(length-1): + bondpair = [noffset + b, noffset + b + 1] + bonds.append(bondpair) + noffset += length - # create the sequence of the second strand as made of - # complementary bases - seq2 = [5-s for s in seq] - seq2.reverse() + # create the sequence of the second strand as made of + # complementary bases + seq2 = [5-s for s in seq] + seq2.reverse() - myns += 1 - for b in xrange(length): - basetype.append(seq2[b]) - strandnum.append(myns) + myns += 1 + for b in range(length): + basetype.append(seq2[b]) + strandnum.append(myns) - for b in xrange(length-1): - bondpair = [noffset + b, noffset + b + 1] - bonds.append(bondpair) - noffset += length - - print >> sys.stdout, "## Created duplex of %i bases" % (2*length) + for b in range(length-1): + bondpair = [noffset + b, noffset + b + 1] + bonds.append(bondpair) + noffset += length - # generate random position of the first nucleotide + print( "## Created duplex of %i bases" % (2*length), file=sys.stdout) + + # generate random position of the first nucleotide cdm = box_offset + np.random.random_sample(3) * box - # generate the random direction of the helix + # generate the random direction of the helix axis = np.random.random_sample(3) axis /= np.sqrt(np.dot(axis, axis)) - # use the generate function defined above to create - # the position and orientation vector of the strand + # use the generate function defined above to create + # the position and orientation vector of the strand newpositions, newa1s, newa3s = generate_strand(len(line), \ - sequence=seq, dir=axis, start_pos=cdm, double=True) + sequence=seq, dir=axis, start_pos=cdm, double=True) # generate a new position for the strand until it does not overlap - # with anything already present - start = timer() + # with anything already present + start = timer() while not add_strands(newpositions, newa1s, newa3s): cdm = box_offset + np.random.random_sample(3) * box axis = np.random.random_sample(3) axis /= np.sqrt(np.dot(axis, axis)) newpositions, newa1s, newa3s = generate_strand(len(line), \ - sequence=seq, dir=axis, start_pos=cdm, double=True) - print >> sys.stdout, "## Trying %i" % i - end = timer() - print >> sys.stdout, "## Added duplex of %i bases (line %i/%i) in %.2fs, now at %i/%i" % \ - (2*length, i, nlines, end-start, len(positions), nnucl) + sequence=seq, dir=axis, start_pos=cdm, double=True) + print( "## Trying %i" % i, file=sys.stdout) + end = timer() + print( "## Added duplex of %i bases (line %i/%i) in %.2fs, now at %i/%i" % \ + (2*length, i, nlines, end-start, len(positions), nnucl), file=sys.stdout) - # block for single strands: last argument of the generate function - # is set to 'False' + # block for single strands: last argument of the generate function + # is set to 'False' else: length = len(line) seq = [(base_to_number[x]) for x in line] - myns += 1 - for b in xrange(length): - basetype.append(seq[b]) - strandnum.append(myns) + myns += 1 + for b in range(length): + basetype.append(seq[b]) + strandnum.append(myns) - for b in xrange(length-1): - bondpair = [noffset + b, noffset + b + 1] - bonds.append(bondpair) - noffset += length + for b in range(length-1): + bondpair = [noffset + b, noffset + b + 1] + bonds.append(bondpair) + noffset += length - # generate random position of the first nucleotide + # generate random position of the first nucleotide cdm = box_offset + np.random.random_sample(3) * box - # generate the random direction of the helix + # generate the random direction of the helix axis = np.random.random_sample(3) axis /= np.sqrt(np.dot(axis, axis)) - print >> sys.stdout, \ - "## Created single strand of %i bases" % length + print("## Created single strand of %i bases" % length, file=sys.stdout) newpositions, newa1s, newa3s = generate_strand(length, \ sequence=seq, dir=axis, start_pos=cdm, double=False) - start = timer() + start = timer() while not add_strands(newpositions, newa1s, newa3s): cdm = box_offset + np.random.random_sample(3) * box axis = np.random.random_sample(3) - axis /= np.sqrt(np.dot(axis, axis)) + axis /= np.sqrt(np.dot(axis, axis)) newpositions, newa1s, newa3s = generate_strand(length, \ - sequence=seq, dir=axis, start_pos=cdm, double=False) + sequence=seq, dir=axis, start_pos=cdm, double=False) print >> sys.stdout, "## Trying %i" % (i) - end = timer() - print >> sys.stdout, "## Added single strand of %i bases (line %i/%i) in %.2fs, now at %i/%i" % \ - (length, i, nlines, end-start,len(positions), nnucl) + end = timer() + print( "## Added single strand of %i bases (line %i/%i) in %.2fs, now at %i/%i" % \ + (length, i, nlines, end-start,len(positions), nnucl), file=sys.stdout) i += 1 # sanity check if not len(positions) == nnucl: - print len(positions), nnucl + print( len(positions), nnucl ) raise AssertionError out.write('# LAMMPS data file\n') @@ -580,44 +580,41 @@ def read_strands(filename): out.write('Atoms\n') out.write('\n') - for i in xrange(nnucl): - out.write('%d %d %22.15le %22.15le %22.15le %d 1 1\n' \ - % (i+1, basetype[i], \ - positions[i][0], positions[i][1], positions[i][2], \ - strandnum[i])) + for i in range(nnucl): + out.write('%d %d %22.15le %22.15le %22.15le %d 1 1\n' \ + % (i+1, basetype[i], positions[i][0], positions[i][1], positions[i][2], strandnum[i])) out.write('\n') out.write('# Atom-ID, translational, rotational velocity\n') out.write('Velocities\n') out.write('\n') - for i in xrange(nnucl): - out.write("%d %22.15le %22.15le %22.15le %22.15le %22.15le %22.15le\n" \ - % (i+1,0.0,0.0,0.0,0.0,0.0,0.0)) + for i in range(nnucl): + out.write("%d %22.15le %22.15le %22.15le %22.15le %22.15le %22.15le\n" \ + % (i+1,0.0,0.0,0.0,0.0,0.0,0.0)) out.write('\n') out.write('# Atom-ID, shape, quaternion\n') out.write('Ellipsoids\n') out.write('\n') - for i in xrange(nnucl): - out.write(\ - "%d %22.15le %22.15le %22.15le %22.15le %22.15le %22.15le %22.15le\n" \ - % (i+1,1.1739845031423408,1.1739845031423408,1.1739845031423408, \ - quaternions[i][0],quaternions[i][1], quaternions[i][2],quaternions[i][3])) - + for i in range(nnucl): + out.write("%d %22.15le %22.15le %22.15le %22.15le %22.15le %22.15le %22.15le\n" \ + % (i+1,1.1739845031423408,1.1739845031423408,1.1739845031423408, \ + quaternions[i][0],quaternions[i][1], quaternions[i][2],quaternions[i][3])) + out.write('\n') out.write('# Bond topology\n') out.write('Bonds\n') out.write('\n') - for i in xrange(nbonds): - out.write("%d %d %d %d\n" % (i+1,1,bonds[i][0],bonds[i][1])) + for i in range(nbonds): + out.write("%d %d %d %d\n" % (i+1,1,bonds[i][0],bonds[i][1])) out.close() - print >> sys.stdout, "## Wrote data to 'data.oxdna'" - print >> sys.stdout, "## DONE" + print("## Wrote data to 'data.oxdna'", file=sys.stdout) + print("## DONE", file=sys.stdout) # call the above main() function, which executes the program read_strands (infile) @@ -627,4 +624,6 @@ runtime = end_time-start_time hours = runtime/3600 minutes = (runtime-np.rint(hours)*3600)/60 seconds = (runtime-np.rint(hours)*3600-np.rint(minutes)*60)%60 -print >> sys.stdout, "## Total runtime %ih:%im:%.2fs" % (hours,minutes,seconds) +print( "## Total runtime %ih:%im:%.2fs" % (hours,minutes,seconds), file=sys.stdout) + + diff --git a/examples/PACKAGES/cgdna/util/generate_simple.py b/examples/PACKAGES/cgdna/util/generate_simple.py index 33cf1ee7f5..7702bfc7f5 100644 --- a/examples/PACKAGES/cgdna/util/generate_simple.py +++ b/examples/PACKAGES/cgdna/util/generate_simple.py @@ -1,5 +1,8 @@ # Setup tool for oxDNA input in LAMMPS format. +# for python2/3 compatibility +from __future__ import print_function + import math,numpy as np,sys,os # system size @@ -250,59 +253,59 @@ def duplex_array(): qrot3=math.sin(0.5*twist) for letter in strand[2]: - temp1=[] - temp2=[] + temp1=[] + temp2=[] - temp1.append(nt2num[letter]) - temp2.append(compnt2num[letter]) + temp1.append(nt2num[letter]) + temp2.append(compnt2num[letter]) - temp1.append([posx1,posy1,posz1]) - temp2.append([posx2,posy2,posz2]) + temp1.append([posx1,posy1,posz1]) + temp2.append([posx2,posy2,posz2]) - vel=[0,0,0,0,0,0] - temp1.append(vel) - temp2.append(vel) + vel=[0,0,0,0,0,0] + temp1.append(vel) + temp2.append(vel) - temp1.append(shape) - temp2.append(shape) + temp1.append(shape) + temp2.append(shape) - temp1.append(quat1) - temp2.append(quat2) + temp1.append(quat1) + temp2.append(quat2) - quat1_0 = quat1[0]*qrot0 - quat1[1]*qrot1 - quat1[2]*qrot2 - quat1[3]*qrot3 - quat1_1 = quat1[0]*qrot1 + quat1[1]*qrot0 + quat1[2]*qrot3 - quat1[3]*qrot2 - quat1_2 = quat1[0]*qrot2 + quat1[2]*qrot0 + quat1[3]*qrot1 - quat1[1]*qrot3 - quat1_3 = quat1[0]*qrot3 + quat1[3]*qrot0 + quat1[1]*qrot2 + quat1[2]*qrot1 + quat1_0 = quat1[0]*qrot0 - quat1[1]*qrot1 - quat1[2]*qrot2 - quat1[3]*qrot3 + quat1_1 = quat1[0]*qrot1 + quat1[1]*qrot0 + quat1[2]*qrot3 - quat1[3]*qrot2 + quat1_2 = quat1[0]*qrot2 + quat1[2]*qrot0 + quat1[3]*qrot1 - quat1[1]*qrot3 + quat1_3 = quat1[0]*qrot3 + quat1[3]*qrot0 + quat1[1]*qrot2 + quat1[2]*qrot1 - quat1 = [quat1_0,quat1_1,quat1_2,quat1_3] + quat1 = [quat1_0,quat1_1,quat1_2,quat1_3] - posx1=axisx - dcomh*(quat1[0]**2+quat1[1]**2-quat1[2]**2-quat1[3]**2) - posy1=axisy - dcomh*(2*(quat1[1]*quat1[2]+quat1[0]*quat1[3])) - posz1=posz1+risez + posx1=axisx - dcomh*(quat1[0]**2+quat1[1]**2-quat1[2]**2-quat1[3]**2) + posy1=axisy - dcomh*(2*(quat1[1]*quat1[2]+quat1[0]*quat1[3])) + posz1=posz1+risez - quat2_0 = quat2[0]*qrot0 - quat2[1]*qrot1 - quat2[2]*qrot2 + quat2[3]*qrot3 - quat2_1 = quat2[0]*qrot1 + quat2[1]*qrot0 - quat2[2]*qrot3 - quat2[3]*qrot2 - quat2_2 = quat2[0]*qrot2 + quat2[2]*qrot0 + quat2[3]*qrot1 + quat2[1]*qrot3 - quat2_3 =-quat2[0]*qrot3 + quat2[3]*qrot0 + quat2[1]*qrot2 + quat2[2]*qrot1 + quat2_0 = quat2[0]*qrot0 - quat2[1]*qrot1 - quat2[2]*qrot2 + quat2[3]*qrot3 + quat2_1 = quat2[0]*qrot1 + quat2[1]*qrot0 - quat2[2]*qrot3 - quat2[3]*qrot2 + quat2_2 = quat2[0]*qrot2 + quat2[2]*qrot0 + quat2[3]*qrot1 + quat2[1]*qrot3 + quat2_3 =-quat2[0]*qrot3 + quat2[3]*qrot0 + quat2[1]*qrot2 + quat2[2]*qrot1 - quat2 = [quat2_0,quat2_1,quat2_2,quat2_3] + quat2 = [quat2_0,quat2_1,quat2_2,quat2_3] - posx2=axisx + dcomh*(quat1[0]**2+quat1[1]**2-quat1[2]**2-quat1[3]**2) - posy2=axisy + dcomh*(2*(quat1[1]*quat1[2]+quat1[0]*quat1[3])) - posz2=posz1 + posx2=axisx + dcomh*(quat1[0]**2+quat1[1]**2-quat1[2]**2-quat1[3]**2) + posy2=axisy + dcomh*(2*(quat1[1]*quat1[2]+quat1[0]*quat1[3])) + posz2=posz1 - if (len(nucleotide)+1 > strandstart): - topology.append([1,len(nucleotide),len(nucleotide)+1]) - comptopo.append([1,len(nucleotide)+len(strand[2]),len(nucleotide)+len(strand[2])+1]) + if (len(nucleotide)+1 > strandstart): + topology.append([1,len(nucleotide),len(nucleotide)+1]) + comptopo.append([1,len(nucleotide)+len(strand[2]),len(nucleotide)+len(strand[2])+1]) - nucleotide.append(temp1) - compstrand.append(temp2) + nucleotide.append(temp1) + compstrand.append(temp2) for ib in range(len(compstrand)): - nucleotide.append(compstrand[len(compstrand)-1-ib]) + nucleotide.append(compstrand[len(compstrand)-1-ib]) for ib in range(len(comptopo)): - topology.append(comptopo[ib]) + topology.append(comptopo[ib]) return diff --git a/examples/PACKAGES/reaction/create_atoms_polystyrene/in.grow_styrene b/examples/PACKAGES/reaction/create_atoms_polystyrene/in.grow_styrene index 7860db4e55..dcca29c026 100644 --- a/examples/PACKAGES/reaction/create_atoms_polystyrene/in.grow_styrene +++ b/examples/PACKAGES/reaction/create_atoms_polystyrene/in.grow_styrene @@ -40,7 +40,7 @@ fix 1 statted_grp_REACT nvt temp $T $T 100 fix 4 bond_react_MASTER_group temp/rescale 1 $T $T 1 1 -thermo_style custom step temp press density f_myrxns[1] +thermo_style custom step temp press density f_myrxns[*] thermo 100 diff --git a/examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt b/examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt index 9678a714d6..635b2c9750 100644 --- a/examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt +++ b/examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt @@ -26,7 +26,7 @@ read_data large_nylon_melt.data.gz & extra/angle/per/atom 15 & extra/dihedral/per/atom 15 & extra/improper/per/atom 25 & - extra/special/per/atom 25 + extra/special/per/atom 25 velocity all create 800.0 4928459 dist gaussian @@ -50,7 +50,7 @@ fix 1 statted_grp_REACT nvt temp 800 800 100 # you can use the internally created 'bond_react_MASTER_group', like so: # fix 2 bond_react_MASTER_group temp/rescale 1 800 800 10 1 -thermo_style custom step temp press density f_myrxns[1] f_myrxns[2] # cumulative reaction counts +thermo_style custom step temp press density f_myrxns[*] # cumulative reaction counts # restart 100 restart1 restart2 diff --git a/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized b/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized index 57b03b630f..7e0350cdb0 100644 --- a/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized +++ b/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized @@ -20,7 +20,8 @@ improper_style class2 special_bonds lj/coul 0 0 1 pair_modify tail yes mix sixthpower -read_data tiny_epoxy.data +read_data tiny_epoxy.data & + extra/special/per/atom 25 velocity all create 300.0 4928459 dist gaussian @@ -44,7 +45,7 @@ fix rxns all bond/react stabilization yes statted_grp .03 & fix 1 statted_grp_REACT nvt temp 300 300 100 -thermo_style custom step temp f_rxns[1] f_rxns[2] f_rxns[3] f_rxns[4] +thermo_style custom step temp f_rxns[*] run 2000 diff --git a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized index 95b39033db..853bc45f1e 100644 --- a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized +++ b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized @@ -50,7 +50,7 @@ fix 1 statted_grp_REACT nvt temp 300 300 100 # by using the internally-created 'bond_react_MASTER_group', like so: fix 4 bond_react_MASTER_group temp/rescale 1 300 300 10 1 -thermo_style custom step temp press density f_myrxns[1] f_myrxns[2] +thermo_style custom step temp press density f_myrxns[*] # restart 100 restart1 restart2 diff --git a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability index 88b5a95a41..f3c32f3cbd 100644 --- a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability +++ b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability @@ -54,7 +54,7 @@ fix 1 statted_grp_REACT nvt temp 300 300 100 # by using the internally-created 'bond_react_MASTER_group', like so: fix 4 bond_react_MASTER_group temp/rescale 1 300 300 10 1 -thermo_style custom step temp press density v_prob1 v_prob2 f_myrxns[1] f_myrxns[2] +thermo_style custom step temp press density v_prob1 v_prob2 f_myrxns[*] # restart 100 restart1 restart2 diff --git a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized index a569e28d43..e5cbaaaf86 100644 --- a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized +++ b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized @@ -47,7 +47,7 @@ fix myrxns all bond/react stabilization no & fix 1 all nve/limit .03 -thermo_style custom step temp press density f_myrxns[1] f_myrxns[2] +thermo_style custom step temp press density f_myrxns[*] # restart 100 restart1 restart2 diff --git a/examples/PACKAGES/reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized b/examples/PACKAGES/reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized index 4ecc481719..230998fcd3 100644 --- a/examples/PACKAGES/reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized +++ b/examples/PACKAGES/reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized @@ -51,7 +51,7 @@ fix 1 statted_grp_REACT nvt temp $T $T 100 fix 4 bond_react_MASTER_group temp/rescale 1 $T $T 1 1 -thermo_style custom step temp press density f_rxn1[1] f_rxn1[2] f_rxn1[3] +thermo_style custom step temp press density f_rxn1[*] run 10000 diff --git a/lib/gpu/lal_base_sph.h b/lib/gpu/lal_base_sph.h index e1e5731573..d37e85f170 100644 --- a/lib/gpu/lal_base_sph.h +++ b/lib/gpu/lal_base_sph.h @@ -15,7 +15,7 @@ ***************************************************************************/ #ifndef LAL_BASE_SPH_H -#define LAL_BASE_DPD_H +#define LAL_BASE_SPH_H #include "lal_device.h" #include "lal_balance.h" diff --git a/lib/gpu/lal_coul_slater_long.cu b/lib/gpu/lal_coul_slater_long.cu index 1fc8ab8be4..49cf47b8b3 100644 --- a/lib/gpu/lal_coul_slater_long.cu +++ b/lib/gpu/lal_coul_slater_long.cu @@ -102,6 +102,7 @@ __kernel void k_coul_slater_long(const __global numtyp4 *restrict x_, numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij); _erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; fetch(prefactor,j,q_tex); + prefactor *= qqrd2e * scale[mtype] * qtmp/r; numtyp rlamdainv = r * lamdainv; numtyp exprlmdainv = ucl_exp((numtyp)-2.0*rlamdainv); numtyp slater_term = exprlmdainv*((numtyp)1.0 + ((numtyp)2.0*rlamdainv*((numtyp)1.0+rlamdainv))); diff --git a/lib/gpu/lal_coul_slater_long.h b/lib/gpu/lal_coul_slater_long.h index 8950fd81ef..1731992a16 100644 --- a/lib/gpu/lal_coul_slater_long.h +++ b/lib/gpu/lal_coul_slater_long.h @@ -13,8 +13,8 @@ email : ndactrung@gmail.com ***************************************************************************/ -#ifndef LAL_Coul_Slater_Long_H -#define LAL_Coul_Slater_Long_H +#ifndef LAL_COUL_SLATER_LONG_H +#define LAL_COUL_SLATER_LONG_H #include "lal_base_charge.h" diff --git a/lib/gpu/lal_eam.cpp b/lib/gpu/lal_eam.cpp index b7bc7b958a..0a2ed21ab3 100644 --- a/lib/gpu/lal_eam.cpp +++ b/lib/gpu/lal_eam.cpp @@ -303,7 +303,7 @@ double EAMT::host_memory_usage() const { } // --------------------------------------------------------------------------- -// Copy nbor list from host if necessary and then compute atom energies/forces +// Copy nbor list from host if necessary and then compute per-atom fp // --------------------------------------------------------------------------- template void EAMT::compute(const int f_ago, const int inum_full, const int nlocal, @@ -379,7 +379,7 @@ void EAMT::compute(const int f_ago, const int inum_full, const int nlocal, } // --------------------------------------------------------------------------- -// Reneighbor on GPU and then compute per-atom densities +// Reneighbor on GPU and then compute per-atom fp // --------------------------------------------------------------------------- template int** EAMT::compute(const int ago, const int inum_full, const int nall, @@ -461,7 +461,7 @@ int** EAMT::compute(const int ago, const int inum_full, const int nall, } // --------------------------------------------------------------------------- -// Copy nbor list from host if necessary and then calculate forces, virials,.. +// Update per-atom fp, and then calculate forces, virials,.. // --------------------------------------------------------------------------- template void EAMT::compute2(int *ilist, const bool eflag, const bool vflag, diff --git a/lib/gpu/lal_sph_heatconduction.h b/lib/gpu/lal_sph_heatconduction.h index 23241e8c92..cd7a46e3bd 100644 --- a/lib/gpu/lal_sph_heatconduction.h +++ b/lib/gpu/lal_sph_heatconduction.h @@ -13,8 +13,8 @@ email : ndactrung@gmail.com ***************************************************************************/ -#ifndef LAL_SPH_LJ_H -#define LAL_SPH_LJ_H +#ifndef LAL_SPH_HEATCONDUCTION_H +#define LAL_SPH_HEATCONDUCTION_H #include "lal_base_sph.h" diff --git a/lib/kokkos/core/src/Kokkos_Printf.hpp b/lib/kokkos/core/src/Kokkos_Printf.hpp index 39f95825c3..af20221a5a 100644 --- a/lib/kokkos/core/src/Kokkos_Printf.hpp +++ b/lib/kokkos/core/src/Kokkos_Printf.hpp @@ -31,7 +31,7 @@ namespace Kokkos { // backends. The GPU backends always return 1 and NVHPC only compiles if we // don't ask for the return value. template -KOKKOS_FUNCTION void printf(const char* format, Args... args) { +KOKKOS_FORCEINLINE_FUNCTION void printf(const char* format, Args... args) { #ifdef KOKKOS_ENABLE_SYCL // Some compilers warn if "args" is empty and format is not a string literal if constexpr (sizeof...(Args) == 0) diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp index 03f5fff395..4586406e16 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp @@ -219,8 +219,6 @@ KOKKOS_DEPRECATED void OpenMP::partition_master(F const& f, int num_partitions, Exec::validate_partition_impl(prev_instance->m_pool_size, num_partitions, partition_size); - OpenMP::memory_space space; - #pragma omp parallel num_threads(num_partitions) { Exec thread_local_instance(partition_size); diff --git a/src/.gitignore b/src/.gitignore index 112a1486f7..1e4c5b9ddb 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -348,6 +348,8 @@ /compute_nbond_atom.h /fix_nve_bpm_sphere.cpp /fix_nve_bpm_sphere.h +/fix_update_special_bonds.cpp +/fix_update_special_bonds.h /pair_bpm_spring.cpp /pair_bpm_spring.h diff --git a/src/BPM/bond_bpm.cpp b/src/BPM/bond_bpm.cpp index 3ebeed3f1d..b484df7fab 100644 --- a/src/BPM/bond_bpm.cpp +++ b/src/BPM/bond_bpm.cpp @@ -224,7 +224,7 @@ void BondBPM::settings(int narg, char **arg) ifix = modify->get_fix_by_id(id_fix_prop_atom); if (!ifix) - ifix = modify->add_fix(fmt::format("{} all property/atom {} {} {} ghost yes", + ifix = modify->add_fix(fmt::format("{} all property/atom d_{} d_{} d_{} ghost yes", id_fix_prop_atom, x_ref_id, y_ref_id, z_ref_id)); int type_flag; diff --git a/src/fix_update_special_bonds.cpp b/src/BPM/fix_update_special_bonds.cpp similarity index 73% rename from src/fix_update_special_bonds.cpp rename to src/BPM/fix_update_special_bonds.cpp index 159b2a1170..b6bf8b433f 100644 --- a/src/fix_update_special_bonds.cpp +++ b/src/BPM/fix_update_special_bonds.cpp @@ -20,6 +20,7 @@ #include "force.h" #include "modify.h" #include "neigh_list.h" +#include "neighbor.h" #include "pair.h" #include @@ -61,7 +62,8 @@ void FixUpdateSpecialBonds::setup(int /*vflag*/) // Require atoms know about all of their bonds and if they break if (force->newton_bond) error->all(FLERR, "Fix update/special/bonds requires Newton bond off"); - if (!atom->avec->bonds_allow) error->all(FLERR, "Fix update/special/bonds requires atom bonds"); + if (!atom->avec->bonds_allow) + error->all(FLERR, "Fix update/special/bonds requires an atom style supporting bonds"); // special lj must be 0 1 1 to censor pair forces between bonded particles // special coulomb must be 1 1 1 to ensure all pairs are included in the @@ -72,9 +74,6 @@ void FixUpdateSpecialBonds::setup(int /*vflag*/) force->special_coul[3] != 1.0) error->all(FLERR, "Fix update/special/bonds requires special Coulomb weights = 1,1,1"); // Implies neighbor->special_flag = [X, 2, 1, 1] - - if (utils::strmatch(force->pair_style, "^hybrid")) - error->all(FLERR, "Cannot use fix update/special/bonds with hybrid pair styles"); } /* ---------------------------------------------------------------------- @@ -158,69 +157,86 @@ void FixUpdateSpecialBonds::pre_force(int /*vflag*/) int i1, i2, j, jj, jnum; int *jlist, *numneigh, **firstneigh; tagint tag1, tag2; + NeighList *list; int nlocal = atom->nlocal; - tagint *tag = atom->tag; - NeighList *list = force->pair->list; // may need to be generalized for pair hybrid* - numneigh = list->numneigh; - firstneigh = list->firstneigh; // In theory could communicate a list of broken bonds to neighboring processors here // to remove restriction that users use Newton bond off - for (auto const &it : new_broken_pairs) { - tag1 = it.first; - tag2 = it.second; - i1 = atom->map(tag1); - i2 = atom->map(tag2); + for (int ilist = 0; ilist < neighbor->nlist; ilist++) { + list = neighbor->lists[ilist]; - // Loop through atoms of owned atoms i j - if (i1 < nlocal) { - jlist = firstneigh[i1]; - jnum = numneigh[i1]; - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - j &= SPECIALMASK; // Clear special bond bits - if (tag[j] == tag2) jlist[jj] = j; + // Skip copied lists, will update original + if (list->copy) continue; + + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + for (auto const &it : new_broken_pairs) { + tag1 = it.first; + tag2 = it.second; + i1 = atom->map(tag1); + i2 = atom->map(tag2); + + // Loop through atoms of owned atoms i j + if (i1 < nlocal) { + jlist = firstneigh[i1]; + jnum = numneigh[i1]; + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= SPECIALMASK; // Clear special bond bits + if (tag[j] == tag2) jlist[jj] = j; + } } - } - if (i2 < nlocal) { - jlist = firstneigh[i2]; - jnum = numneigh[i2]; - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - j &= SPECIALMASK; // Clear special bond bits - if (tag[j] == tag1) jlist[jj] = j; + if (i2 < nlocal) { + jlist = firstneigh[i2]; + jnum = numneigh[i2]; + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= SPECIALMASK; // Clear special bond bits + if (tag[j] == tag1) jlist[jj] = j; + } } } } - for (auto const &it : new_created_pairs) { - tag1 = it.first; - tag2 = it.second; - i1 = atom->map(tag1); - i2 = atom->map(tag2); + for (int ilist = 0; ilist < neighbor->nlist; ilist++) { + list = neighbor->lists[ilist]; - // Loop through atoms of owned atoms i j and update SB bits - if (i1 < nlocal) { - jlist = firstneigh[i1]; - jnum = numneigh[i1]; - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - if (((j >> SBBITS) & 3) != 0) continue; // Skip bonded pairs - if (tag[j] == tag2) jlist[jj] = j ^ (1 << SBBITS); // Add 1-2 special bond bits + // Skip copied lists, will update original + if (list->copy) continue; + + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + for (auto const &it : new_created_pairs) { + tag1 = it.first; + tag2 = it.second; + i1 = atom->map(tag1); + i2 = atom->map(tag2); + + // Loop through atoms of owned atoms i j and update SB bits + if (i1 < nlocal) { + jlist = firstneigh[i1]; + jnum = numneigh[i1]; + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + if (((j >> SBBITS) & 3) != 0) continue; // Skip bonded pairs + if (tag[j] == tag2) jlist[jj] = j ^ (1 << SBBITS); // Add 1-2 special bond bits + } } - } - if (i2 < nlocal) { - jlist = firstneigh[i2]; - jnum = numneigh[i2]; - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - if (((j >> SBBITS) & 3) != 0) continue; // Skip bonded pairs - if (tag[j] == tag1) jlist[jj] = j ^ (1 << SBBITS); // Add 1-2 special bond bits + if (i2 < nlocal) { + jlist = firstneigh[i2]; + jnum = numneigh[i2]; + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + if (((j >> SBBITS) & 3) != 0) continue; // Skip bonded pairs + if (tag[j] == tag1) jlist[jj] = j ^ (1 << SBBITS); // Add 1-2 special bond bits + } } } } diff --git a/src/fix_update_special_bonds.h b/src/BPM/fix_update_special_bonds.h similarity index 100% rename from src/fix_update_special_bonds.h rename to src/BPM/fix_update_special_bonds.h diff --git a/src/BPM/pair_bpm_spring.cpp b/src/BPM/pair_bpm_spring.cpp index 1177156359..01cee91b4c 100644 --- a/src/BPM/pair_bpm_spring.cpp +++ b/src/BPM/pair_bpm_spring.cpp @@ -19,6 +19,7 @@ #include "force.h" #include "memory.h" #include "neigh_list.h" +#include "neighbor.h" #include @@ -202,6 +203,18 @@ void PairBPMSpring::coeff(int narg, char **arg) if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients"); } +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairBPMSpring::init_style() +{ + if (comm->ghost_velocity == 0) + error->all(FLERR,"Pair bpm/spring requires ghost atoms store velocity"); + + neighbor->add_request(this); +} + /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ diff --git a/src/BPM/pair_bpm_spring.h b/src/BPM/pair_bpm_spring.h index 3cb281bff3..c10e4a3400 100644 --- a/src/BPM/pair_bpm_spring.h +++ b/src/BPM/pair_bpm_spring.h @@ -31,6 +31,7 @@ class PairBPMSpring : public Pair { void compute(int, int) override; void settings(int, char **) override; void coeff(int, char **) override; + void init_style() override; double init_one(int, int) override; void write_restart(FILE *) override; void read_restart(FILE *) override; diff --git a/src/Depend.sh b/src/Depend.sh index dbffb2dba0..3df1347e67 100755 --- a/src/Depend.sh +++ b/src/Depend.sh @@ -99,6 +99,7 @@ fi if (test $1 = "EXTRA-PAIR") then depend GPU + depend KOKKOS depend OPENMP fi diff --git a/src/EFF/fix_langevin_eff.cpp b/src/EFF/fix_langevin_eff.cpp index 8c255e4348..a25b6ac837 100644 --- a/src/EFF/fix_langevin_eff.cpp +++ b/src/EFF/fix_langevin_eff.cpp @@ -137,7 +137,7 @@ void FixLangevinEff::post_force_no_tally() dof = domain->dimension * particles; fix_dof = 0; for (int i = 0; i < modify->nfix; i++) - fix_dof += modify->fix[i]->dof(igroup); + fix_dof += (int)modify->fix[i]->dof(igroup); // extra_dof = domain->dimension dof -= domain->dimension + fix_dof; @@ -306,7 +306,7 @@ void FixLangevinEff::post_force_tally() dof = domain->dimension * particles; fix_dof = 0; for (int i = 0; i < modify->nfix; i++) - fix_dof += modify->fix[i]->dof(igroup); + fix_dof += (int)modify->fix[i]->dof(igroup); // extra_dof = domain->dimension dof -= domain->dimension + fix_dof; diff --git a/src/ELECTRODE/pppm_electrode.cpp b/src/ELECTRODE/pppm_electrode.cpp index 6ede0f1f4d..0ae3da6863 100644 --- a/src/ELECTRODE/pppm_electrode.cpp +++ b/src/ELECTRODE/pppm_electrode.cpp @@ -633,7 +633,9 @@ void PPPMElectrode::project_psi(double *vec, int sensor_grpbit) // project u_brick with weight matrix double **x = atom->x; int *mask = atom->mask; - double const scaleinv = 1.0 / (nx_pppm * ny_pppm * nz_pppm); + const bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm; + const double scaleinv = 1.0 / ngridtotal; + for (int i = 0; i < atom->nlocal; i++) { if (!(mask[i] & sensor_grpbit)) continue; double v = 0.; @@ -1362,7 +1364,7 @@ double PPPMElectrode::compute_qopt() // each proc calculates contributions from every Pth grid point bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm; - int nxy_pppm = nx_pppm * ny_pppm; + bigint nxy_pppm = (bigint) nx_pppm * ny_pppm; double qopt = 0.0; diff --git a/src/EXTRA-COMPUTE/compute_rattlers_atom.cpp b/src/EXTRA-COMPUTE/compute_rattlers_atom.cpp index 602923b58a..9dacf14171 100644 --- a/src/EXTRA-COMPUTE/compute_rattlers_atom.cpp +++ b/src/EXTRA-COMPUTE/compute_rattlers_atom.cpp @@ -144,7 +144,6 @@ void ComputeRattlersAtom::compute_peratom() numneigh = list->numneigh; firstneigh = list->firstneigh; - Pair *pair = force->pair; double **cutsq = force->pair->cutsq; int change_flag = 1; diff --git a/src/EXTRA-COMPUTE/compute_rattlers_atom.h b/src/EXTRA-COMPUTE/compute_rattlers_atom.h index 257bae8374..79a0a0a982 100644 --- a/src/EXTRA-COMPUTE/compute_rattlers_atom.h +++ b/src/EXTRA-COMPUTE/compute_rattlers_atom.h @@ -38,8 +38,7 @@ class ComputeRattlersAtom : public Compute { void unpack_reverse_comm(int, int *, double *) override; private: - int pstyle, cutstyle; - int ncontacts_rattler, max_tries, nmax, invoked_peratom; + int cutstyle, ncontacts_rattler, max_tries, nmax, invoked_peratom; int *ncontacts; double *rattler; class NeighList *list; diff --git a/src/EXTRA-COMPUTE/compute_slcsa_atom.cpp b/src/EXTRA-COMPUTE/compute_slcsa_atom.cpp index 509362a73b..6c272938b6 100644 --- a/src/EXTRA-COMPUTE/compute_slcsa_atom.cpp +++ b/src/EXTRA-COMPUTE/compute_slcsa_atom.cpp @@ -33,21 +33,20 @@ #include #include -#include using namespace LAMMPS_NS; static const char cite_compute_slcsa_atom_c[] = - "compute slcsa/atom command: doi:10.1088/0965-0393/21/5/055020\n\n" + "compute slcsa/atom command: doi:10.1016/j.commatsci.2023.112534\n\n" "@Article{Lafourcade2023,\n" " author = {P. Lafourcade and J.-B. Maillet and C. Denoual and E. Duval and A. Allera and A. " "M. Goryaeva and M.-C. Marinica},\n" " title = {Robust crystal structure identification at extreme conditions using a " "density-independent spectral descriptor and supervised learning},\n" " journal = {Computational Materials Science},\n" - " year = 2023,\n" - " volume = XX,\n" - " pages = {XXXXXX}\n" + " year = 2023,\n" + " volume = 230,\n" + " pages = 112534\n" "}\n\n"; /* ---------------------------------------------------------------------- */ @@ -79,6 +78,8 @@ ComputeSLCSAAtom::ComputeSLCSAAtom(LAMMPS *lmp, int narg, char **arg) : // # LR bias vector // vector with 1 row x nclasses cols + if (lmp->citeme) lmp->citeme->add(cite_compute_slcsa_atom_c); + if (narg != 11) utils::missing_cmd_args(FLERR, "compute slcsa/atom", error); int twojmax = utils::inumeric(FLERR, arg[3], false, lmp); diff --git a/src/EXTRA-COMPUTE/compute_slcsa_atom.h b/src/EXTRA-COMPUTE/compute_slcsa_atom.h index 6d7cd90c31..ba373a53a8 100644 --- a/src/EXTRA-COMPUTE/compute_slcsa_atom.h +++ b/src/EXTRA-COMPUTE/compute_slcsa_atom.h @@ -53,14 +53,12 @@ class ComputeSLCSAAtom : public Compute { value_t descriptorval; int nmax; int ncols; - int nevery; int ncomps; int nclasses; const char *database_mean_descriptor_file; const char *lda_scalings_file; const char *lr_decision_file; const char *lr_bias_file; - const char *covmat_file; const char *maha_file; class NeighList *list; diff --git a/src/EXTRA-FIX/fix_ave_correlate_long.cpp b/src/EXTRA-FIX/fix_ave_correlate_long.cpp index 7fa57af343..fc1760b353 100644 --- a/src/EXTRA-FIX/fix_ave_correlate_long.cpp +++ b/src/EXTRA-FIX/fix_ave_correlate_long.cpp @@ -503,7 +503,7 @@ void FixAveCorrelateLong::end_of_step() if (overwrite) { bigint fileend = platform::ftell(fp); if ((fileend > 0) && (platform::ftruncate(fp,fileend))) - error->warning(FLERR,"Error while tuncating output: {}", utils::getsyserror()); + error->warning(FLERR,"Error while truncating output: {}", utils::getsyserror()); } } } @@ -728,7 +728,7 @@ double FixAveCorrelateLong::memory_usage() { void FixAveCorrelateLong::write_restart(FILE *fp) { if (comm->me == 0) { int nsize = 3*npair*numcorrelators*p + 2*npair*numcorrelators - + numcorrelators*p + 2*numcorrelators + 6; + + numcorrelators*p + 2*numcorrelators + 7; int n=0; double *list; memory->create(list,nsize,"correlator:list"); @@ -736,6 +736,7 @@ void FixAveCorrelateLong::write_restart(FILE *fp) { list[n++] = numcorrelators; list[n++] = p; list[n++] = m; + list[n++] = kmax; list[n++] = last_accumulated_step; for (int i=0; i < npair; i++) for (int j=0; j < numcorrelators; j++) { @@ -771,6 +772,7 @@ void FixAveCorrelateLong::restart(char *buf) int numcorrelatorsin = static_cast (list[n++]); int pin = static_cast(list[n++]); int min = static_cast(list[n++]); + kmax = static_cast(list[n++]); last_accumulated_step = static_cast(list[n++]); if ((npairin!=npair) || (numcorrelatorsin!=numcorrelators) || (pin!=(int)p) || (min!=(int)m)) diff --git a/src/EXTRA-FIX/fix_nonaffine_displacement.cpp b/src/EXTRA-FIX/fix_nonaffine_displacement.cpp index c1de50c41d..a426a8fb55 100644 --- a/src/EXTRA-FIX/fix_nonaffine_displacement.cpp +++ b/src/EXTRA-FIX/fix_nonaffine_displacement.cpp @@ -202,7 +202,7 @@ void FixNonaffineDisplacement::init() // need an occasional half neighbor list if (cut_style == RADIUS) { - auto req = neighbor->add_request(this, NeighConst::REQ_SIZE | NeighConst::REQ_OCCASIONAL); + neighbor->add_request(this, NeighConst::REQ_SIZE | NeighConst::REQ_OCCASIONAL); } else { auto req = neighbor->add_request(this, NeighConst::REQ_OCCASIONAL); if (cut_style == CUSTOM) { @@ -233,7 +233,7 @@ void FixNonaffineDisplacement::init_list(int /*id*/, NeighList *ptr) /* ---------------------------------------------------------------------- */ -void FixNonaffineDisplacement::setup(int vflag) +void FixNonaffineDisplacement::setup(int /*vflag*/) { post_force(0); // Save state if needed before starting the 1st timestep } @@ -285,7 +285,6 @@ void FixNonaffineDisplacement::restart(char *buf) void FixNonaffineDisplacement::integrate_velocity() { - int i,n; dtv = update->dt; double **v = atom->v; @@ -306,7 +305,6 @@ void FixNonaffineDisplacement::integrate_velocity() void FixNonaffineDisplacement::save_reference_state() { - int i, n; double **x = atom->x; int *mask = atom->mask; @@ -354,15 +352,14 @@ void FixNonaffineDisplacement::calculate_D2Min() int i, j, k, l, ii, jj, inum, jnum, itype, jtype; double evol, j2, edev; - double r[3], r0[3], rsq, rsq0, radsum, temp[3]; + double r[3], r0[3], rsq, radsum, temp[3]; double X_tmp[3][3], Y_tmp[3][3], F_tmp[3][3], E[3][3]; - double Y_inv[3][3] = {0.0}; // Zero for 2d since not all entries used + double Y_inv[3][3] = {{0.0,0.0,0.0},{0.0,0.0,0.0},{0.0,0.0,0.0}}; // Zero for 2d since not all entries used int *ilist, *jlist, *numneigh, **firstneigh; double **x = atom->x; double **x0 = array_atom; double *radius = atom->radius; - tagint *tag = atom->tag; int *type = atom->type; int *mask = atom->mask; int nlocal = atom->nlocal; diff --git a/src/EXTRA-MOLECULE/angle_cosine_periodic.cpp b/src/EXTRA-MOLECULE/angle_cosine_periodic.cpp index 34a8e9d8e5..245a7b8d58 100644 --- a/src/EXTRA-MOLECULE/angle_cosine_periodic.cpp +++ b/src/EXTRA-MOLECULE/angle_cosine_periodic.cpp @@ -120,7 +120,7 @@ void AngleCosinePeriodic::compute(int eflag, int vflag) tn = 1.0; tn_1 = 1.0; tn_2 = 0.0; - un = 1.0; + un = (m==1) ? 2.0 : 1.0; un_1 = 2.0; un_2 = 0.0; diff --git a/src/EXTRA-MOLECULE/dihedral_quadratic.cpp b/src/EXTRA-MOLECULE/dihedral_quadratic.cpp index f576e6efdd..a7c0dc3eb1 100644 --- a/src/EXTRA-MOLECULE/dihedral_quadratic.cpp +++ b/src/EXTRA-MOLECULE/dihedral_quadratic.cpp @@ -338,7 +338,7 @@ void DihedralQuadratic::born_matrix(int nd, int i1, int i2, int i3, int i4, double sb1,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2; double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2; double c2mag,sc1,sc2,s12,c; - double s1,s2,cx,cy,cz,cmag,dx,phi,si,siinv,sin2; + double cx,cy,cz,cmag,dx,phi,si,siinv,sin2; int **dihedrallist = neighbor->dihedrallist; double **x = atom->x; @@ -405,8 +405,6 @@ void DihedralQuadratic::born_matrix(int nd, int i1, int i2, int i3, int i4, if (sc2 < SMALL) sc2 = SMALL; sc2 = 1.0/sc2; - s1 = sc1 * sc1; - s2 = sc2 * sc2; s12 = sc1 * sc2; c = (c0 + c1mag*c2mag) * s12; diff --git a/src/GPU/pppm_gpu.cpp b/src/GPU/pppm_gpu.cpp index a2c733e7ed..1959f00865 100644 --- a/src/GPU/pppm_gpu.cpp +++ b/src/GPU/pppm_gpu.cpp @@ -405,7 +405,8 @@ void PPPMGPU::poisson_ik() // if requested, compute energy and virial contribution - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm; + double scaleinv = 1.0 / ngridtotal; double s2 = scaleinv*scaleinv; if (eflag_global || vflag_global) { diff --git a/src/INTEL/pppm_electrode_intel.cpp b/src/INTEL/pppm_electrode_intel.cpp index 5cb62dc5d2..4d8a0331b8 100644 --- a/src/INTEL/pppm_electrode_intel.cpp +++ b/src/INTEL/pppm_electrode_intel.cpp @@ -420,7 +420,9 @@ void PPPMElectrodeIntel::project_psi(IntelBuffers *buffers, double #endif { int *mask = atom->mask; - const flt_t scaleinv = 1.0 / (nx_pppm * ny_pppm * nz_pppm); + + const bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm; + const flt_t scaleinv = 1.0 / ngridtotal; const flt_t lo0 = boxlo[0]; const flt_t lo1 = boxlo[1]; diff --git a/src/KIM/kim_interactions.cpp b/src/KIM/kim_interactions.cpp index 1f4f84e648..ce550bf5da 100644 --- a/src/KIM/kim_interactions.cpp +++ b/src/KIM/kim_interactions.cpp @@ -70,6 +70,8 @@ #include "modify.h" #include "update.h" +#include "fmt/ranges.h" + #include #include diff --git a/src/KIM/kim_param.cpp b/src/KIM/kim_param.cpp index f72df81989..c50474fe67 100644 --- a/src/KIM/kim_param.cpp +++ b/src/KIM/kim_param.cpp @@ -68,6 +68,8 @@ #include "pair_kim.h" #include "variable.h" +#include "fmt/ranges.h" + #include #include #include diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index ba0deedb45..112a2e947a 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -106,6 +106,8 @@ action compute_temp_kokkos.cpp action compute_temp_kokkos.h action dihedral_charmm_kokkos.cpp dihedral_charmm.cpp action dihedral_charmm_kokkos.h dihedral_charmm.h +action dihedral_charmmfsw_kokkos.cpp dihedral_charmmfsw.cpp +action dihedral_charmmfsw_kokkos.h dihedral_charmmfsw.h action dihedral_class2_kokkos.cpp dihedral_class2.cpp action dihedral_class2_kokkos.h dihedral_class2.h action dihedral_harmonic_kokkos.cpp dihedral_harmonic.cpp @@ -311,6 +313,8 @@ action pair_lj_charmm_coul_charmm_kokkos.cpp pair_lj_charmm_coul_charmm.cpp action pair_lj_charmm_coul_charmm_kokkos.h pair_lj_charmm_coul_charmm.h action pair_lj_charmm_coul_long_kokkos.cpp pair_lj_charmm_coul_long.cpp action pair_lj_charmm_coul_long_kokkos.h pair_lj_charmm_coul_long.h +action pair_lj_charmmfsw_coul_long_kokkos.cpp pair_lj_charmmfsw_coul_long.cpp +action pair_lj_charmmfsw_coul_long_kokkos.h pair_lj_charmmfsw_coul_long.h action pair_lj_class2_coul_cut_kokkos.cpp pair_lj_class2_coul_cut.cpp action pair_lj_class2_coul_cut_kokkos.h pair_lj_class2_coul_cut.h action pair_lj_class2_coul_long_kokkos.cpp pair_lj_class2_coul_long.cpp diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index c55c1d315b..501b719ad4 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -31,7 +31,9 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp) +AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp), +mapBinner(1, 0.0, 1.0), // no default constructor, these values are not used +mapSorter(d_tag_sorted, 0, 1, mapBinner, true) { avecKK = nullptr; @@ -300,7 +302,7 @@ void AtomKokkos::grow(unsigned int mask) int AtomKokkos::add_custom(const char *name, int flag, int cols) { - int index; + int index = -1; if (flag == 0 && cols == 0) { index = nivector; diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index 6a3036375d..e6269b5527 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -103,7 +103,8 @@ class AtomKokkos : public Atom { using MapKeyViewType = decltype(d_tag_sorted); using BinOpMap = Kokkos::BinOp1D; - Kokkos::BinSort Sorter; + BinOpMap mapBinner; + Kokkos::BinSort mapSorter; class AtomVecKokkos* avecKK; diff --git a/src/KOKKOS/atom_map_kokkos.cpp b/src/KOKKOS/atom_map_kokkos.cpp index 06516e4142..4f46c33dbe 100644 --- a/src/KOKKOS/atom_map_kokkos.cpp +++ b/src/KOKKOS/atom_map_kokkos.cpp @@ -146,7 +146,7 @@ void AtomKokkos::map_set() int nmax = atom->nmax; int realloc_flag = 0; - if (d_tag_sorted.extent(0) < nmax) { + if (!d_tag_sorted.data() || (int)d_tag_sorted.extent(0) < nmax) { MemKK::realloc_kokkos(d_tag_sorted,"atom:tag_sorted",nmax); MemKK::realloc_kokkos(d_i_sorted,"atom:i_sorted",nmax); realloc_flag = 1; @@ -179,25 +179,25 @@ void AtomKokkos::map_set() using MapKeyViewType = decltype(d_tag_sorted); using BinOpMap = Kokkos::BinOp1D; - auto binner = BinOpMap(nall, min, max); + mapBinner = BinOpMap(nall, min, max); - if (!Sorter.bin_offsets.data() || realloc_flag) { - Sorter = Kokkos::BinSort(d_tag_sorted, 0, nall, binner, true); - MemKK::realloc_kokkos(Sorter.bin_count_atomic,"Kokkos::SortImpl::BinSortFunctor::bin_count",nmax+1); - Kokkos::deep_copy(Sorter.bin_count_atomic,0); - Sorter.bin_count_const = Sorter.bin_count_atomic; - MemKK::realloc_kokkos(Sorter.bin_offsets,"Kokkos::SortImpl::BinSortFunctor::bin_offsets",nmax+1); - MemKK::realloc_kokkos(Sorter.sort_order,"Kokkos::SortImpl::BinSortFunctor::sort_order",nmax); + if (realloc_flag) { + mapSorter = Kokkos::BinSort(d_tag_sorted, 0, nall, mapBinner, true); + MemKK::realloc_kokkos(mapSorter.bin_count_atomic,"Kokkos::SortImpl::BinSortFunctor::bin_count",nmax+1); + Kokkos::deep_copy(mapSorter.bin_count_atomic,0); + mapSorter.bin_count_const = mapSorter.bin_count_atomic; + MemKK::realloc_kokkos(mapSorter.bin_offsets,"Kokkos::SortImpl::BinSortFunctor::bin_offsets",nmax+1); + MemKK::realloc_kokkos(mapSorter.sort_order,"Kokkos::SortImpl::BinSortFunctor::sort_order",nmax); } else { - Kokkos::deep_copy(Sorter.bin_count_atomic,0); - Sorter.bin_op = binner; - Sorter.range_begin = 0; - Sorter.range_end = nall; + Kokkos::deep_copy(mapSorter.bin_count_atomic,0); + mapSorter.bin_op = mapBinner; + mapSorter.range_begin = 0; + mapSorter.range_end = nall; } - Sorter.create_permute_vector(LMPDeviceType()); - Sorter.sort(LMPDeviceType(), d_tag_sorted, 0, nall); - Sorter.sort(LMPDeviceType(), d_i_sorted, 0, nall); + mapSorter.create_permute_vector(LMPDeviceType()); + mapSorter.sort(LMPDeviceType(), d_tag_sorted, 0, nall); + mapSorter.sort(LMPDeviceType(), d_i_sorted, 0, nall); auto d_map_array = k_map_array.d_view; auto d_map_hash = k_map_hash.d_view; @@ -273,6 +273,7 @@ void AtomKokkos::map_set() error->one(FLERR,"Failed to insert into Kokkos hash atom map"); k_sametag.modify_device(); + k_sametag.sync_host(); if (map_style == MAP_ARRAY) k_map_array.modify_device(); diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index dd6be164c0..418c2d629d 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -680,7 +680,6 @@ struct AtomVecAngleKokkos_PackExchangeFunctor { const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -716,7 +715,8 @@ struct AtomVecAngleKokkos_PackExchangeFunctor { _angle_atom2w(atom->k_angle_atom2.view()), _angle_atom3w(atom->k_angle_atom3.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()) { + _copylist(copylist.template view()), + _size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); @@ -858,7 +858,6 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor { const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d nlocal, int dim, X_FLOAT lo, X_FLOAT hi): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -876,8 +875,8 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor { _angle_atom1(atom->k_angle_atom1.view()), _angle_atom2(atom->k_angle_atom2.view()), _angle_atom3(atom->k_angle_atom3.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { + _nlocal(nlocal.template view()), + _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); @@ -927,7 +926,7 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor { int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, - DAT::tdual_int_1d &k_indices) + DAT::tdual_int_1d &/*k_indices*/) { while (nlocal + nrecv/size_exchange >= nmax) grow(0); diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 1ea8377a68..973ad2f7f2 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -294,7 +294,6 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor { const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -308,7 +307,8 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor { _maskw(atom->k_mask.view()), _imagew(atom->k_image.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()) { + _copylist(copylist.template view()), + _size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); @@ -392,16 +392,15 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor { typename AT::tdual_int_1d nlocal, typename AT::tdual_int_1d indices, int dim, X_FLOAT lo, X_FLOAT hi): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), _type(atom->k_type.view()), _mask(atom->k_mask.view()), _image(atom->k_image.view()), - _indices(indices.template view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { + _nlocal(nlocal.template view()), + _indices(indices.template view()),_dim(dim), + _lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index c45bdedf38..a4fd9ca1b5 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -352,7 +352,6 @@ struct AtomVecBondKokkos_PackExchangeFunctor { const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -378,7 +377,8 @@ struct AtomVecBondKokkos_PackExchangeFunctor { _bond_typew(atom->k_bond_type.view()), _bond_atomw(atom->k_bond_atom.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()) { + _copylist(copylist.template view()), + _size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); @@ -503,7 +503,6 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor { typename AT::tdual_int_1d nlocal, typename AT::tdual_int_1d indices, int dim, X_FLOAT lo, X_FLOAT hi): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -516,9 +515,9 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor { _num_bond(atom->k_num_bond.view()), _bond_type(atom->k_bond_type.view()), _bond_atom(atom->k_bond_atom.view()), + _nlocal(nlocal.template view()), _indices(indices.template view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { + _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index 22fc63ff91..4fa814f1ac 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -366,7 +366,6 @@ struct AtomVecChargeKokkos_PackExchangeFunctor { const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -382,7 +381,8 @@ struct AtomVecChargeKokkos_PackExchangeFunctor { _imagew(atom->k_image.view()), _qw(atom->k_q.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()) { + _copylist(copylist.template view()), + _size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; @@ -474,17 +474,16 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor { typename AT::tdual_int_1d nlocal, typename AT::tdual_int_1d indices, int dim, X_FLOAT lo, X_FLOAT hi): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), _type(atom->k_type.view()), _mask(atom->k_mask.view()), _image(atom->k_image.view()), - _indices(indices.template view()), _q(atom->k_q.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { + _nlocal(nlocal.template view()), + _indices(indices.template view()),_dim(dim), + _lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.cpp b/src/KOKKOS/atom_vec_dipole_kokkos.cpp index ad06570cdc..ecc0f3b497 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dipole_kokkos.cpp @@ -398,7 +398,6 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor { const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -416,7 +415,8 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor { _qw(atom->k_q.view()), _muw(atom->k_mu.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()) { + _copylist(copylist.template view()), + _size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; @@ -515,7 +515,6 @@ struct AtomVecDipoleKokkos_UnpackExchangeFunctor { const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d nlocal, int dim, X_FLOAT lo, X_FLOAT hi): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -524,8 +523,8 @@ struct AtomVecDipoleKokkos_UnpackExchangeFunctor { _image(atom->k_image.view()), _q(atom->k_q.view()), _mu(atom->k_mu.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { + _nlocal(nlocal.template view()), + _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); @@ -557,8 +556,8 @@ struct AtomVecDipoleKokkos_UnpackExchangeFunctor { /* ---------------------------------------------------------------------- */ int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, - int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, - DAT::tdual_int_1d &k_indices) + int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, + DAT::tdual_int_1d &/*k_indices*/) { if (space == Host) { k_count.h_view(0) = nlocal; diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index c3430b9f6e..70aedcc931 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -746,7 +746,6 @@ struct AtomVecDPDKokkos_PackExchangeFunctor { const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -772,7 +771,8 @@ struct AtomVecDPDKokkos_PackExchangeFunctor { _uCGw(atom->k_uCG.view()), _uCGneww(atom->k_uCGnew.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()) { + _copylist(copylist.template view()), + _size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); @@ -875,15 +875,14 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor { const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d nlocal, int dim, X_FLOAT lo, X_FLOAT hi): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), _type(atom->k_type.view()), _mask(atom->k_mask.view()), _image(atom->k_image.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { + _nlocal(nlocal.template view()), + _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); @@ -917,7 +916,7 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor { /* ---------------------------------------------------------------------- */ int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, - DAT::tdual_int_1d &k_indices) + DAT::tdual_int_1d &/*k_indices*/) { while (nlocal + nrecv/size_exchange >= nmax) grow(0); diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index 829ebc75e6..732078a627 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -501,7 +501,6 @@ struct AtomVecFullKokkos_PackExchangeFunctor { const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -563,7 +562,8 @@ struct AtomVecFullKokkos_PackExchangeFunctor { _improper_atom3w(atom->k_improper_atom3.view()), _improper_atom4w(atom->k_improper_atom4.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()) { + _copylist(copylist.template view()), + _size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); @@ -755,14 +755,12 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor { typename AT::tdual_int_1d nlocal, typename AT::tdual_int_1d indices, int dim, X_FLOAT lo, X_FLOAT hi): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), _type(atom->k_type.view()), _mask(atom->k_mask.view()), _image(atom->k_image.view()), - _indices(indices.template view()), _q(atom->k_q.view()), _molecule(atom->k_molecule.view()), _nspecial(atom->k_nspecial.view()), @@ -787,9 +785,9 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor { _improper_atom2(atom->k_improper_atom2.view()), _improper_atom3(atom->k_improper_atom3.view()), _improper_atom4(atom->k_improper_atom4.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { - + _nlocal(nlocal.template view()), + _indices(indices.template view()), + _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index 4e01ab5794..08bcaaef74 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -66,7 +66,7 @@ void AtomVecHybridKokkos::sort_kokkos(Kokkos::BinSort &Sorte int AtomVecHybridKokkos::pack_comm_kokkos(const int &/*n*/, const DAT::tdual_int_2d &/*k_sendlist*/, const int & /*iswap*/, const DAT::tdual_xfloat_2d &/*buf*/, - const int &/*pbc_flag*/, const int pbc[]) + const int &/*pbc_flag*/, const int /*pbc*/[]) { error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); return 0; @@ -80,7 +80,7 @@ void AtomVecHybridKokkos::unpack_comm_kokkos(const int &/*n*/, const int &/*nfir int AtomVecHybridKokkos::pack_comm_self(const int &/*n*/, const DAT::tdual_int_2d &/*list*/, const int & /*iswap*/, const int /*nfirst*/, - const int &/*pbc_flag*/, const int pbc[]) + const int &/*pbc_flag*/, const int /*pbc*/[]) { error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); return 0; @@ -113,7 +113,7 @@ int AtomVecHybridKokkos::pack_exchange_kokkos(const int &/*nsend*/,DAT::tdual_xf int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d & /*k_buf*/, int /*nrecv*/, int /*nlocal*/, int /*dim*/, X_FLOAT /*lo*/, X_FLOAT /*hi*/, ExecutionSpace /*space*/, - DAT::tdual_int_1d &k_indices) + DAT::tdual_int_1d &/*k_indices*/) { error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); return 0; diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index 471dd0ad58..ec98ff9239 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -762,7 +762,6 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor { const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -822,7 +821,8 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor { _improper_atom3w(atom->k_improper_atom3.view()), _improper_atom4w(atom->k_improper_atom4.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()) { + _copylist(copylist.template view()), + _size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); @@ -1010,7 +1010,6 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor { typename AT::tdual_int_1d nlocal, typename AT::tdual_int_1d indices, int dim, X_FLOAT lo, X_FLOAT hi): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -1040,9 +1039,9 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor { _improper_atom2(atom->k_improper_atom2.view()), _improper_atom3(atom->k_improper_atom3.view()), _improper_atom4(atom->k_improper_atom4.view()), + _nlocal(nlocal.template view()), _indices(indices.template view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { + _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index 5a1c2beee3..3dfb5143cd 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -1448,7 +1448,6 @@ struct AtomVecSphereKokkos_PackExchangeFunctor { const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -1468,7 +1467,8 @@ struct AtomVecSphereKokkos_PackExchangeFunctor { _rmassw(atom->k_rmass.view()), _omegaw(atom->k_omega.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()) { + _copylist(copylist.template view()), + _size_exchange(atom->avecKK->size_exchange) { const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; _buf = typename AT::t_xfloat_2d_um(buf.template view().data(),maxsend,_size_exchange); @@ -1572,7 +1572,6 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor { typename AT::tdual_int_1d nlocal, typename AT::tdual_int_1d indices, int dim, X_FLOAT lo, X_FLOAT hi): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -1584,9 +1583,7 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor { _omega(atom->k_omega.view()), _nlocal(nlocal.template view()), _indices(indices.template view()), - _dim(dim), - _lo(lo),_hi(hi) - { + _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { const size_t size_exchange = 16; const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/size_exchange; diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp index d2dd3a05ab..72d38a731e 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.cpp +++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp @@ -410,7 +410,6 @@ struct AtomVecSpinKokkos_PackExchangeFunctor { const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d sendlist, typename AT::tdual_int_1d copylist): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -426,7 +425,8 @@ struct AtomVecSpinKokkos_PackExchangeFunctor { _imagew(atom->k_image.view()), _spw(atom->k_sp.view()), _sendlist(sendlist.template view()), - _copylist(copylist.template view()) { + _copylist(copylist.template view()), + _size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); @@ -521,7 +521,6 @@ struct AtomVecSpinKokkos_UnpackExchangeFunctor { const typename AT::tdual_xfloat_2d buf, typename AT::tdual_int_1d nlocal, int dim, X_FLOAT lo, X_FLOAT hi): - _size_exchange(atom->avecKK->size_exchange), _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -529,8 +528,8 @@ struct AtomVecSpinKokkos_UnpackExchangeFunctor { _mask(atom->k_mask.view()), _image(atom->k_image.view()), _sp(atom->k_sp.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi) { + _nlocal(nlocal.template view()), + _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); @@ -563,7 +562,7 @@ struct AtomVecSpinKokkos_UnpackExchangeFunctor { int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space, - DAT::tdual_int_1d &k_indices) + DAT::tdual_int_1d &/*k_indices*/) { while (nlocal + nrecv/size_exchange >= nmax) grow(0); @@ -592,7 +591,7 @@ int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int n include f b/c this is invoked from within SPIN pair styles ------------------------------------------------------------------------- */ -void AtomVecSpinKokkos::force_clear(int n, size_t nbytes) +void AtomVecSpinKokkos::force_clear(int /*n*/, size_t nbytes) { int nzero = (double)nbytes/sizeof(double); diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index b586dca7a5..2f1818e47f 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -864,7 +864,7 @@ void CommKokkos::exchange_device() if (nrecv) { if (atom->nextra_grow) { - if (k_indices.extent(0) < nrecv/data_size) + if ((int) k_indices.extent(0) < nrecv/data_size) MemoryKokkos::realloc_kokkos(k_indices,"comm:indices",nrecv/data_size); } else if (k_indices.h_view.data()) k_indices = DAT::tdual_int_1d(); @@ -931,6 +931,7 @@ void CommKokkos::exchange_device() if (nextrarecv) { kkbase->unpack_exchange_kokkos( k_buf_recv,k_indices,nrecv/data_size, + nrecv1/data_size,nextrarecv1, ExecutionSpaceFromDevice::space); DeviceType().fence(); } diff --git a/src/KOKKOS/compute_reaxff_atom_kokkos.cpp b/src/KOKKOS/compute_reaxff_atom_kokkos.cpp index 8dbcb9441e..3f6c9242d4 100644 --- a/src/KOKKOS/compute_reaxff_atom_kokkos.cpp +++ b/src/KOKKOS/compute_reaxff_atom_kokkos.cpp @@ -87,7 +87,7 @@ void ComputeReaxFFAtomKokkos::compute_bonds() nbuf = ((store_bonds ? maxnumbonds*2 : 0) + 3)*nlocal; - if (!buf || k_buf.extent(0) < nbuf) { + if (!buf || ((int)k_buf.extent(0) < nbuf)) { memoryKK->destroy_kokkos(k_buf, buf); memoryKK->create_kokkos(k_buf, buf, nbuf, "reaxff/atom:buf"); } diff --git a/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp b/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp new file mode 100644 index 0000000000..1caea90a74 --- /dev/null +++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp @@ -0,0 +1,815 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + + Contributing author: Mitch Murphy (alphataubio) + + Based on serial dihedral_charmmfsw.cpp lj-fsw sections (force-switched) + provided by Robert Meissner and Lucio Colombi Ciacchi of Bremen + University, Germany, with additional assistance from + Robert A. Latour, Clemson University. + +------------------------------------------------------------------------- */ + +#include "dihedral_charmmfsw_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "error.h" +#include "force.h" +#include "kokkos.h" +#include "math_const.h" +#include "memory_kokkos.h" +#include "neighbor_kokkos.h" +#include "pair.h" + +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define TOLERANCE 0.05 + +/* ---------------------------------------------------------------------- */ + +template +DihedralCharmmfswKokkos::DihedralCharmmfswKokkos(LAMMPS *lmp) : DihedralCharmmfsw(lmp) +{ + atomKK = (AtomKokkos *) atom; + neighborKK = (NeighborKokkos *) neighbor; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK | TYPE_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + + k_warning_flag = Kokkos::DualView("Dihedral:warning_flag"); + d_warning_flag = k_warning_flag.template view(); + h_warning_flag = k_warning_flag.h_view; + + centroidstressflag = CENTROID_NOTAVAIL; +} + +/* ---------------------------------------------------------------------- */ + +template +DihedralCharmmfswKokkos::~DihedralCharmmfswKokkos() +{ + if (!copymode) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void DihedralCharmmfswKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (lmp->kokkos->neighflag == FULL) + error->all(FLERR,"Dihedral_style charmm/kk requires half neighbor list"); + + ev_init(eflag,vflag,0); + + // ensure pair->ev_tally() will use 1-4 virial contribution + + if (weightflag && vflag_global == VIRIAL_FDOTR) + force->pair->vflag_either = force->pair->vflag_global = 1; + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + //if(k_eatom.extent(0)destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom"); + d_eatom = k_eatom.template view(); + k_eatom_pair = Kokkos::DualView("dihedral:eatom_pair",maxeatom); + d_eatom_pair = k_eatom_pair.template view(); + //} + } + if (vflag_atom) { + //if(k_vatom.extent(0)destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"dihedral:vatom"); + d_vatom = k_vatom.template view(); + k_vatom_pair = Kokkos::DualView("dihedral:vatom_pair",maxvatom); + d_vatom_pair = k_vatom_pair.template view(); + //} + } + + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + q = atomKK->k_q.view(); + atomtype = atomKK->k_type.view(); + neighborKK->k_dihedrallist.template sync(); + dihedrallist = neighborKK->k_dihedrallist.view(); + int ndihedrallist = neighborKK->ndihedrallist; + nlocal = atom->nlocal; + newton_bond = force->newton_bond; + qqrd2e = force->qqrd2e; + + h_warning_flag() = 0; + k_warning_flag.template modify(); + k_warning_flag.template sync(); + + copymode = 1; + + // loop over neighbors of my atoms + + EVM_FLOAT evm; + + if (evflag) { + if (newton_bond) { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ndihedrallist),*this,evm); + } else { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ndihedrallist),*this,evm); + } + } else { + if (newton_bond) { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,ndihedrallist),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,ndihedrallist),*this); + } + } + + // error check + + k_warning_flag.template modify(); + k_warning_flag.template sync(); + if (h_warning_flag()) + error->warning(FLERR,"Dihedral problem"); + + if (eflag_global) { + energy += evm.emol; + force->pair->eng_vdwl += evm.evdwl; + force->pair->eng_coul += evm.ecoul; + } + if (vflag_global) { + virial[0] += evm.v[0]; + virial[1] += evm.v[1]; + virial[2] += evm.v[2]; + virial[3] += evm.v[3]; + virial[4] += evm.v[4]; + virial[5] += evm.v[5]; + + force->pair->virial[0] += evm.vp[0]; + force->pair->virial[1] += evm.vp[1]; + force->pair->virial[2] += evm.vp[2]; + force->pair->virial[3] += evm.vp[3]; + force->pair->virial[4] += evm.vp[4]; + force->pair->virial[5] += evm.vp[5]; + } + + // don't yet have dualviews for eatom and vatom in pair_kokkos, + // so need to manually copy these to pair style + + int n = nlocal; + if (newton_bond) n += atom->nghost; + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + + k_eatom_pair.template modify(); + k_eatom_pair.template sync(); + for (int i = 0; i < n; i++) + force->pair->eatom[i] += k_eatom_pair.h_view(i); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + + k_vatom_pair.template modify(); + k_vatom_pair.template sync(); + for (int i = 0; i < n; i++) { + force->pair->vatom[i][0] += k_vatom_pair.h_view(i,0); + force->pair->vatom[i][1] += k_vatom_pair.h_view(i,1); + force->pair->vatom[i][2] += k_vatom_pair.h_view(i,2); + force->pair->vatom[i][3] += k_vatom_pair.h_view(i,3); + force->pair->vatom[i][4] += k_vatom_pair.h_view(i,4); + force->pair->vatom[i][5] += k_vatom_pair.h_view(i,5); + } + } + + copymode = 0; +} + +template +template +KOKKOS_INLINE_FUNCTION +void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmfswCompute, const int &n, EVM_FLOAT& evm) const { + + // The f array is atomic + Kokkos::View::value,Kokkos::MemoryTraits > a_f = f; + + const int i1 = dihedrallist(n,0); + const int i2 = dihedrallist(n,1); + const int i3 = dihedrallist(n,2); + const int i4 = dihedrallist(n,3); + const int type = dihedrallist(n,4); + + // 1st bond + + const F_FLOAT vb1x = x(i1,0) - x(i2,0); + const F_FLOAT vb1y = x(i1,1) - x(i2,1); + const F_FLOAT vb1z = x(i1,2) - x(i2,2); + + // 2nd bond + + const F_FLOAT vb2x = x(i3,0) - x(i2,0); + const F_FLOAT vb2y = x(i3,1) - x(i2,1); + const F_FLOAT vb2z = x(i3,2) - x(i2,2); + + const F_FLOAT vb2xm = -vb2x; + const F_FLOAT vb2ym = -vb2y; + const F_FLOAT vb2zm = -vb2z; + + // 3rd bond + + const F_FLOAT vb3x = x(i4,0) - x(i3,0); + const F_FLOAT vb3y = x(i4,1) - x(i3,1); + const F_FLOAT vb3z = x(i4,2) - x(i3,2); + + const F_FLOAT ax = vb1y*vb2zm - vb1z*vb2ym; + const F_FLOAT ay = vb1z*vb2xm - vb1x*vb2zm; + const F_FLOAT az = vb1x*vb2ym - vb1y*vb2xm; + const F_FLOAT bx = vb3y*vb2zm - vb3z*vb2ym; + const F_FLOAT by = vb3z*vb2xm - vb3x*vb2zm; + const F_FLOAT bz = vb3x*vb2ym - vb3y*vb2xm; + + const F_FLOAT rasq = ax*ax + ay*ay + az*az; + const F_FLOAT rbsq = bx*bx + by*by + bz*bz; + const F_FLOAT rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; + const F_FLOAT rg = sqrt(rgsq); + + F_FLOAT rginv,ra2inv,rb2inv; + rginv = ra2inv = rb2inv = 0.0; + if (rg > 0) rginv = 1.0/rg; + if (rasq > 0) ra2inv = 1.0/rasq; + if (rbsq > 0) rb2inv = 1.0/rbsq; + const F_FLOAT rabinv = sqrt(ra2inv*rb2inv); + + F_FLOAT c = (ax*bx + ay*by + az*bz)*rabinv; + F_FLOAT s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); + + // error check + + if ((c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) && !d_warning_flag()) + d_warning_flag() = 1; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + const int m = d_multiplicity[type]; + F_FLOAT p = 1.0; + F_FLOAT ddf1,df1; + ddf1 = df1 = 0.0; + + for (int i = 0; i < m; i++) { + ddf1 = p*c - df1*s; + df1 = p*s + df1*c; + p = ddf1; + } + + p = p*d_cos_shift[type] + df1*d_sin_shift[type]; + df1 = df1*d_cos_shift[type] - ddf1*d_sin_shift[type]; + df1 *= -m; + p += 1.0; + + if (m == 0) { + p = 1.0 + d_cos_shift[type]; + df1 = 0.0; + } + + E_FLOAT edihedral = 0.0; + if (eflag) edihedral = d_k[type] * p; + + const F_FLOAT fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm; + const F_FLOAT hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm; + const F_FLOAT fga = fg*ra2inv*rginv; + const F_FLOAT hgb = hg*rb2inv*rginv; + const F_FLOAT gaa = -ra2inv*rg; + const F_FLOAT gbb = rb2inv*rg; + + const F_FLOAT dtfx = gaa*ax; + const F_FLOAT dtfy = gaa*ay; + const F_FLOAT dtfz = gaa*az; + const F_FLOAT dtgx = fga*ax - hgb*bx; + const F_FLOAT dtgy = fga*ay - hgb*by; + const F_FLOAT dtgz = fga*az - hgb*bz; + const F_FLOAT dthx = gbb*bx; + const F_FLOAT dthy = gbb*by; + const F_FLOAT dthz = gbb*bz; + + const F_FLOAT df = -d_k[type] * df1; + + const F_FLOAT sx2 = df*dtgx; + const F_FLOAT sy2 = df*dtgy; + const F_FLOAT sz2 = df*dtgz; + + F_FLOAT f1[3],f2[3],f3[3],f4[3]; + f1[0] = df*dtfx; + f1[1] = df*dtfy; + f1[2] = df*dtfz; + + f2[0] = sx2 - f1[0]; + f2[1] = sy2 - f1[1]; + f2[2] = sz2 - f1[2]; + + f4[0] = df*dthx; + f4[1] = df*dthy; + f4[2] = df*dthz; + + f3[0] = -sx2 - f4[0]; + f3[1] = -sy2 - f4[1]; + f3[2] = -sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + a_f(i1,0) += f1[0]; + a_f(i1,1) += f1[1]; + a_f(i1,2) += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + a_f(i2,0) += f2[0]; + a_f(i2,1) += f2[1]; + a_f(i2,2) += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + a_f(i3,0) += f3[0]; + a_f(i3,1) += f3[1]; + a_f(i3,2) += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + a_f(i4,0) += f4[0]; + a_f(i4,1) += f4[1]; + a_f(i4,2) += f4[2]; + } + + if (EVFLAG) + ev_tally(evm,i1,i2,i3,i4,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z); + + // 1-4 LJ and Coulomb interactions + // tally energy/virial in pair, using newton_bond as newton flag + + if (d_weight[type] > 0.0) { + const int itype = atomtype[i1]; + const int jtype = atomtype[i4]; + + const F_FLOAT delx = x(i1,0) - x(i4,0); + const F_FLOAT dely = x(i1,1) - x(i4,1); + const F_FLOAT delz = x(i1,2) - x(i4,2); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT r6inv = r2inv*r2inv*r2inv; + + F_FLOAT forcecoul; + if (implicit) forcecoul = qqrd2e * q[i1]*q[i4]*r2inv; + else forcecoul = qqrd2e * q[i1]*q[i4]*sqrt(r2inv); + const F_FLOAT forcelj = r6inv * (d_lj14_1(itype,jtype)*r6inv - d_lj14_2(itype,jtype)); + const F_FLOAT fpair = d_weight[type] * (forcelj+forcecoul)*r2inv; + + const F_FLOAT r = sqrt(rsq); + F_FLOAT ecoul = 0.0; + F_FLOAT evdwl = 0.0; + F_FLOAT evdwl14_12, evdwl14_6; + if (eflag) { + if (dihedflag) + ecoul = d_weight[type] * forcecoul; + else + ecoul = d_weight[type] * qqrd2e * q[i1] * q[i4] * + (sqrt(r2inv) + r * cut_coulinv14 * cut_coulinv14 - 2.0 * cut_coulinv14); + evdwl14_12 = r6inv * d_lj14_3(itype,jtype) * r6inv - + d_lj14_3(itype,jtype) * cut_lj_inner6inv * cut_lj6inv; + evdwl14_6 = + -d_lj14_4(itype,jtype) * r6inv + d_lj14_4(itype,jtype) * cut_lj_inner3inv * cut_lj3inv; + evdwl = evdwl14_12 + evdwl14_6; + evdwl *= d_weight[type]; + } + + if (newton_bond || i1 < nlocal) { + a_f(i1,0) += delx*fpair; + a_f(i1,1) += dely*fpair; + a_f(i1,2) += delz*fpair; + } + if (newton_bond || i4 < nlocal) { + a_f(i4,0) -= delx*fpair; + a_f(i4,1) -= dely*fpair; + a_f(i4,2) -= delz*fpair; + } + + if (EVFLAG) ev_tally(evm,i1,i4,evdwl,ecoul,fpair,delx,dely,delz); + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmfswCompute, const int &n) const { + EVM_FLOAT evm; + this->template operator()(TagDihedralCharmmfswCompute(), n, evm); +} + +/* ---------------------------------------------------------------------- */ + +template +void DihedralCharmmfswKokkos::allocate() +{ + DihedralCharmmfsw::allocate(); +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more types +------------------------------------------------------------------------- */ + +template +void DihedralCharmmfswKokkos::coeff(int narg, char **arg) +{ + DihedralCharmmfsw::coeff(narg, arg); + + int nd = atom->ndihedraltypes; + typename AT::tdual_ffloat_1d k_k("DihedralCharmmfsw::k",nd+1); + typename AT::tdual_ffloat_1d k_multiplicity("DihedralCharmmfsw::multiplicity",nd+1); + typename AT::tdual_ffloat_1d k_shift("DihedralCharmmfsw::shift",nd+1); + typename AT::tdual_ffloat_1d k_cos_shift("DihedralCharmmfsw::cos_shift",nd+1); + typename AT::tdual_ffloat_1d k_sin_shift("DihedralCharmmfsw::sin_shift",nd+1); + typename AT::tdual_ffloat_1d k_weight("DihedralCharmmfsw::weight",nd+1); + + d_k = k_k.template view(); + d_multiplicity = k_multiplicity.template view(); + d_shift = k_shift.template view(); + d_cos_shift = k_cos_shift.template view(); + d_sin_shift = k_sin_shift.template view(); + d_weight = k_weight.template view(); + + int n = atom->ndihedraltypes; + for (int i = 1; i <= n; i++) { + k_k.h_view[i] = k[i]; + k_multiplicity.h_view[i] = multiplicity[i]; + k_shift.h_view[i] = shift[i]; + k_cos_shift.h_view[i] = cos_shift[i]; + k_sin_shift.h_view[i] = sin_shift[i]; + k_weight.h_view[i] = weight[i]; + } + + k_k.template modify(); + k_multiplicity.template modify(); + k_shift.template modify(); + k_cos_shift.template modify(); + k_sin_shift.template modify(); + k_weight.template modify(); + + k_k.template sync(); + k_multiplicity.template sync(); + k_shift.template sync(); + k_cos_shift.template sync(); + k_sin_shift.template sync(); + k_weight.template sync(); +} + +/* ---------------------------------------------------------------------- + error check and initialize all values needed for force computation +------------------------------------------------------------------------- */ + +template +void DihedralCharmmfswKokkos::init_style() +{ + DihedralCharmmfsw::init_style(); + + int n = atom->ntypes; + DAT::tdual_ffloat_2d k_lj14_1("DihedralCharmmfsw:lj14_1",n+1,n+1); + DAT::tdual_ffloat_2d k_lj14_2("DihedralCharmmfsw:lj14_2",n+1,n+1); + DAT::tdual_ffloat_2d k_lj14_3("DihedralCharmmfsw:lj14_3",n+1,n+1); + DAT::tdual_ffloat_2d k_lj14_4("DihedralCharmmfsw:lj14_4",n+1,n+1); + + d_lj14_1 = k_lj14_1.template view(); + d_lj14_2 = k_lj14_2.template view(); + d_lj14_3 = k_lj14_3.template view(); + d_lj14_4 = k_lj14_4.template view(); + + + if (weightflag) { + int n = atom->ntypes; + for (int i = 1; i <= n; i++) { + for (int j = 1; j <= n; j++) { + k_lj14_1.h_view(i,j) = lj14_1[i][j]; + k_lj14_2.h_view(i,j) = lj14_2[i][j]; + k_lj14_3.h_view(i,j) = lj14_3[i][j]; + k_lj14_4.h_view(i,j) = lj14_4[i][j]; + } + } + } + + k_lj14_1.template modify(); + k_lj14_2.template modify(); + k_lj14_3.template modify(); + k_lj14_4.template modify(); + + k_lj14_1.template sync(); + k_lj14_2.template sync(); + k_lj14_3.template sync(); + k_lj14_4.template sync(); +} + +/* ---------------------------------------------------------------------- + proc 0 reads coeffs from restart file, bcasts them +------------------------------------------------------------------------- */ + +template +void DihedralCharmmfswKokkos::read_restart(FILE *fp) +{ + DihedralCharmmfsw::read_restart(fp); + + int nd = atom->ndihedraltypes; + typename AT::tdual_ffloat_1d k_k("DihedralCharmmfsw::k",nd+1); + typename AT::tdual_ffloat_1d k_multiplicity("DihedralCharmmfsw::multiplicity",nd+1); + typename AT::tdual_ffloat_1d k_shift("DihedralCharmmfsw::shift",nd+1); + typename AT::tdual_ffloat_1d k_cos_shift("DihedralCharmmfsw::cos_shift",nd+1); + typename AT::tdual_ffloat_1d k_sin_shift("DihedralCharmmfsw::sin_shift",nd+1); + typename AT::tdual_ffloat_1d k_weight("DihedralCharmmfsw::weight",nd+1); + + d_k = k_k.template view(); + d_multiplicity = k_multiplicity.template view(); + d_shift = k_shift.template view(); + d_cos_shift = k_cos_shift.template view(); + d_sin_shift = k_sin_shift.template view(); + d_weight = k_weight.template view(); + + int n = atom->ndihedraltypes; + for (int i = 1; i <= n; i++) { + k_k.h_view[i] = k[i]; + k_multiplicity.h_view[i] = multiplicity[i]; + k_shift.h_view[i] = shift[i]; + k_cos_shift.h_view[i] = cos_shift[i]; + k_sin_shift.h_view[i] = sin_shift[i]; + k_weight.h_view[i] = weight[i]; + } + + k_k.template modify(); + k_multiplicity.template modify(); + k_shift.template modify(); + k_cos_shift.template modify(); + k_sin_shift.template modify(); + k_weight.template modify(); + + k_k.template sync(); + k_multiplicity.template sync(); + k_shift.template sync(); + k_cos_shift.template sync(); + k_sin_shift.template sync(); + k_weight.template sync(); +} + +/* ---------------------------------------------------------------------- + tally energy and virial into global and per-atom accumulators + virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4 + = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4 + = vb1*f1 + vb2*f3 + (vb3+vb2)*f4 +------------------------------------------------------------------------- */ + +template +//template +KOKKOS_INLINE_FUNCTION +void DihedralCharmmfswKokkos::ev_tally(EVM_FLOAT &evm, const int i1, const int i2, const int i3, const int i4, + F_FLOAT &edihedral, F_FLOAT *f1, F_FLOAT *f3, F_FLOAT *f4, + const F_FLOAT &vb1x, const F_FLOAT &vb1y, const F_FLOAT &vb1z, + const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z, + const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const +{ + E_FLOAT edihedralquarter; + F_FLOAT v[6]; + + if (eflag_either) { + if (eflag_global) { + if (newton_bond) evm.emol += edihedral; + else { + edihedralquarter = 0.25*edihedral; + if (i1 < nlocal) evm.emol += edihedralquarter; + if (i2 < nlocal) evm.emol += edihedralquarter; + if (i3 < nlocal) evm.emol += edihedralquarter; + if (i4 < nlocal) evm.emol += edihedralquarter; + } + } + if (eflag_atom) { + edihedralquarter = 0.25*edihedral; + if (newton_bond || i1 < nlocal) d_eatom[i1] += edihedralquarter; + if (newton_bond || i2 < nlocal) d_eatom[i2] += edihedralquarter; + if (newton_bond || i3 < nlocal) d_eatom[i3] += edihedralquarter; + if (newton_bond || i4 < nlocal) d_eatom[i4] += edihedralquarter; + } + } + + if (vflag_either) { + v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0]; + v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1]; + v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2]; + v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1]; + v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2]; + v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2]; + + if (vflag_global) { + if (newton_bond) { + evm.v[0] += v[0]; + evm.v[1] += v[1]; + evm.v[2] += v[2]; + evm.v[3] += v[3]; + evm.v[4] += v[4]; + evm.v[5] += v[5]; + } else { + if (i1 < nlocal) { + evm.v[0] += 0.25*v[0]; + evm.v[1] += 0.25*v[1]; + evm.v[2] += 0.25*v[2]; + evm.v[3] += 0.25*v[3]; + evm.v[4] += 0.25*v[4]; + evm.v[5] += 0.25*v[5]; + } + if (i2 < nlocal) { + evm.v[0] += 0.25*v[0]; + evm.v[1] += 0.25*v[1]; + evm.v[2] += 0.25*v[2]; + evm.v[3] += 0.25*v[3]; + evm.v[4] += 0.25*v[4]; + evm.v[5] += 0.25*v[5]; + } + if (i3 < nlocal) { + evm.v[0] += 0.25*v[0]; + evm.v[1] += 0.25*v[1]; + evm.v[2] += 0.25*v[2]; + evm.v[3] += 0.25*v[3]; + evm.v[4] += 0.25*v[4]; + evm.v[5] += 0.25*v[5]; + } + if (i4 < nlocal) { + evm.v[0] += 0.25*v[0]; + evm.v[1] += 0.25*v[1]; + evm.v[2] += 0.25*v[2]; + evm.v[3] += 0.25*v[3]; + evm.v[4] += 0.25*v[4]; + evm.v[5] += 0.25*v[5]; + } + } + } + + if (vflag_atom) { + if (newton_bond || i1 < nlocal) { + d_vatom(i1,0) += 0.25*v[0]; + d_vatom(i1,1) += 0.25*v[1]; + d_vatom(i1,2) += 0.25*v[2]; + d_vatom(i1,3) += 0.25*v[3]; + d_vatom(i1,4) += 0.25*v[4]; + d_vatom(i1,5) += 0.25*v[5]; + } + if (newton_bond || i2 < nlocal) { + d_vatom(i2,0) += 0.25*v[0]; + d_vatom(i2,1) += 0.25*v[1]; + d_vatom(i2,2) += 0.25*v[2]; + d_vatom(i2,3) += 0.25*v[3]; + d_vatom(i2,4) += 0.25*v[4]; + d_vatom(i2,5) += 0.25*v[5]; + } + if (newton_bond || i3 < nlocal) { + d_vatom(i3,0) += 0.25*v[0]; + d_vatom(i3,1) += 0.25*v[1]; + d_vatom(i3,2) += 0.25*v[2]; + d_vatom(i3,3) += 0.25*v[3]; + d_vatom(i3,4) += 0.25*v[4]; + d_vatom(i3,5) += 0.25*v[5]; + } + if (newton_bond || i4 < nlocal) { + d_vatom(i4,0) += 0.25*v[0]; + d_vatom(i4,1) += 0.25*v[1]; + d_vatom(i4,2) += 0.25*v[2]; + d_vatom(i4,3) += 0.25*v[3]; + d_vatom(i4,4) += 0.25*v[4]; + d_vatom(i4,5) += 0.25*v[5]; + } + } + } +} + +/* ---------------------------------------------------------------------- + tally eng_vdwl and virial into global and per-atom accumulators + need i < nlocal test since called by bond_quartic and dihedral_charmm +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void DihedralCharmmfswKokkos::ev_tally(EVM_FLOAT &evm, const int i, const int j, + const F_FLOAT &evdwl, const F_FLOAT &ecoul, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const +{ + E_FLOAT evdwlhalf,ecoulhalf,epairhalf; + F_FLOAT v[6]; + + + if (eflag_either) { + if (eflag_global) { + if (newton_bond) { + evm.evdwl += evdwl; + evm.ecoul += ecoul; + } else { + evdwlhalf = 0.5*evdwl; + ecoulhalf = 0.5*ecoul; + if (i < nlocal) { + evm.evdwl += evdwlhalf; + evm.ecoul += ecoulhalf; + } + if (j < nlocal) { + evm.evdwl += evdwlhalf; + evm.ecoul += ecoulhalf; + } + } + } + if (eflag_atom) { + epairhalf = 0.5 * (evdwl + ecoul); + if (newton_bond || i < nlocal) d_eatom_pair[i] += epairhalf; + if (newton_bond || j < nlocal) d_eatom_pair[j] += epairhalf; + } + } + + if (vflag_either) { + v[0] = delx*delx*fpair; + v[1] = dely*dely*fpair; + v[2] = delz*delz*fpair; + v[3] = delx*dely*fpair; + v[4] = delx*delz*fpair; + v[5] = dely*delz*fpair; + + if (vflag_global) { + if (newton_bond) { + evm.vp[0] += v[0]; + evm.vp[1] += v[1]; + evm.vp[2] += v[2]; + evm.vp[3] += v[3]; + evm.vp[4] += v[4]; + evm.vp[5] += v[5]; + } else { + if (i < nlocal) { + evm.vp[0] += 0.5*v[0]; + evm.vp[1] += 0.5*v[1]; + evm.vp[2] += 0.5*v[2]; + evm.vp[3] += 0.5*v[3]; + evm.vp[4] += 0.5*v[4]; + evm.vp[5] += 0.5*v[5]; + } + if (j < nlocal) { + evm.vp[0] += 0.5*v[0]; + evm.vp[1] += 0.5*v[1]; + evm.vp[2] += 0.5*v[2]; + evm.vp[3] += 0.5*v[3]; + evm.vp[4] += 0.5*v[4]; + evm.vp[5] += 0.5*v[5]; + } + } + } + + if (vflag_atom) { + if (newton_bond || i < nlocal) { + d_vatom_pair(i,0) += 0.5*v[0]; + d_vatom_pair(i,1) += 0.5*v[1]; + d_vatom_pair(i,2) += 0.5*v[2]; + d_vatom_pair(i,3) += 0.5*v[3]; + d_vatom_pair(i,4) += 0.5*v[4]; + d_vatom_pair(i,5) += 0.5*v[5]; + } + if (newton_bond || j < nlocal) { + d_vatom_pair(j,0) += 0.5*v[0]; + d_vatom_pair(j,1) += 0.5*v[1]; + d_vatom_pair(j,2) += 0.5*v[2]; + d_vatom_pair(j,3) += 0.5*v[3]; + d_vatom_pair(j,4) += 0.5*v[4]; + d_vatom_pair(j,5) += 0.5*v[5]; + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +namespace LAMMPS_NS { +template class DihedralCharmmfswKokkos; +#ifdef LMP_KOKKOS_GPU +template class DihedralCharmmfswKokkos; +#endif +} + diff --git a/src/KOKKOS/dihedral_charmmfsw_kokkos.h b/src/KOKKOS/dihedral_charmmfsw_kokkos.h new file mode 100644 index 0000000000..b1c65ae477 --- /dev/null +++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.h @@ -0,0 +1,118 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS +// clang-format off +DihedralStyle(charmmfsw/kk,DihedralCharmmfswKokkos); +DihedralStyle(charmmfsw/kk/device,DihedralCharmmfswKokkos); +DihedralStyle(charmmfsw/kk/host,DihedralCharmmfswKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_DIHEDRAL_CHARMMFSW_KOKKOS_H +#define LMP_DIHEDRAL_CHARMMFSW_KOKKOS_H + +#include "dihedral_charmmfsw.h" +#include "kokkos_type.h" +#include "dihedral_charmm_kokkos.h" // needed for s_EVM_FLOAT + +namespace LAMMPS_NS { + +template +struct TagDihedralCharmmfswCompute{}; + +template +class DihedralCharmmfswKokkos : public DihedralCharmmfsw { + public: + typedef DeviceType device_type; + typedef EVM_FLOAT value_type; + typedef ArrayTypes AT; + + DihedralCharmmfswKokkos(class LAMMPS *); + ~DihedralCharmmfswKokkos() override; + void compute(int, int) override; + void coeff(int, char **) override; + void init_style() override; + void read_restart(FILE *) override; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagDihedralCharmmfswCompute, const int&, EVM_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagDihedralCharmmfswCompute, const int&) const; + + //template + KOKKOS_INLINE_FUNCTION + void ev_tally(EVM_FLOAT &evm, const int i1, const int i2, const int i3, const int i4, + F_FLOAT &edihedral, F_FLOAT *f1, F_FLOAT *f3, F_FLOAT *f4, + const F_FLOAT &vb1x, const F_FLOAT &vb1y, const F_FLOAT &vb1z, + const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z, + const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const; + + KOKKOS_INLINE_FUNCTION + void ev_tally(EVM_FLOAT &evm, const int i, const int j, + const F_FLOAT &evdwl, const F_FLOAT &ecoul, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const; + + protected: + + class NeighborKokkos *neighborKK; + + typename AT::t_x_array_randomread x; + typename AT::t_int_1d_randomread atomtype; + typename AT::t_ffloat_1d_randomread q; + typename AT::t_f_array f; + typename AT::t_int_2d dihedrallist; + + typedef typename KKDevice::value KKDeviceType; + Kokkos::DualView k_eatom; + Kokkos::DualView k_vatom; + Kokkos::View > d_eatom; + Kokkos::View > d_vatom; + + Kokkos::DualView k_eatom_pair; + Kokkos::DualView k_vatom_pair; + Kokkos::View > d_eatom_pair; + Kokkos::View > d_vatom_pair; + + int nlocal,newton_bond; + int eflag,vflag; + double qqrd2e; + + Kokkos::DualView k_warning_flag; + typename Kokkos::DualView::t_dev d_warning_flag; + typename Kokkos::DualView::t_host h_warning_flag; + + typename AT::t_ffloat_2d d_lj14_1; + typename AT::t_ffloat_2d d_lj14_2; + typename AT::t_ffloat_2d d_lj14_3; + typename AT::t_ffloat_2d d_lj14_4; + + typename AT::t_ffloat_1d d_k; + typename AT::t_ffloat_1d d_multiplicity; + typename AT::t_ffloat_1d d_shift; + typename AT::t_ffloat_1d d_sin_shift; + typename AT::t_ffloat_1d d_cos_shift; + typename AT::t_ffloat_1d d_weight; + + void allocate() override; +}; + +} + +#endif +#endif + diff --git a/src/KOKKOS/dynamical_matrix_kokkos.cpp b/src/KOKKOS/dynamical_matrix_kokkos.cpp index 32986025e6..ec2cc17ef2 100644 --- a/src/KOKKOS/dynamical_matrix_kokkos.cpp +++ b/src/KOKKOS/dynamical_matrix_kokkos.cpp @@ -174,72 +174,45 @@ void DynamicalMatrixKokkos::update_force() } bool execute_on_host = false; - unsigned int datamask_read_device = 0; - unsigned int datamask_modify_device = 0; unsigned int datamask_read_host = 0; if (pair_compute_flag) { if (force->pair->execution_space==Host) { execute_on_host = true; datamask_read_host |= force->pair->datamask_read; - datamask_modify_device |= force->pair->datamask_modify; - } else { - datamask_read_device |= force->pair->datamask_read; - datamask_modify_device |= force->pair->datamask_modify; } } if (atomKK->molecular && force->bond) { if (force->bond->execution_space==Host) { execute_on_host = true; datamask_read_host |= force->bond->datamask_read; - datamask_modify_device |= force->bond->datamask_modify; - } else { - datamask_read_device |= force->bond->datamask_read; - datamask_modify_device |= force->bond->datamask_modify; } } if (atomKK->molecular && force->angle) { if (force->angle->execution_space==Host) { execute_on_host = true; datamask_read_host |= force->angle->datamask_read; - datamask_modify_device |= force->angle->datamask_modify; - } else { - datamask_read_device |= force->angle->datamask_read; - datamask_modify_device |= force->angle->datamask_modify; } } if (atomKK->molecular && force->dihedral) { if (force->dihedral->execution_space==Host) { execute_on_host = true; datamask_read_host |= force->dihedral->datamask_read; - datamask_modify_device |= force->dihedral->datamask_modify; - } else { - datamask_read_device |= force->dihedral->datamask_read; - datamask_modify_device |= force->dihedral->datamask_modify; } } if (atomKK->molecular && force->improper) { if (force->improper->execution_space==Host) { execute_on_host = true; datamask_read_host |= force->improper->datamask_read; - datamask_modify_device |= force->improper->datamask_modify; - } else { - datamask_read_device |= force->improper->datamask_read; - datamask_modify_device |= force->improper->datamask_modify; } } if (kspace_compute_flag) { if (force->kspace->execution_space==Host) { execute_on_host = true; datamask_read_host |= force->kspace->datamask_read; - datamask_modify_device |= force->kspace->datamask_modify; - } else { - datamask_read_device |= force->kspace->datamask_read; - datamask_modify_device |= force->kspace->datamask_modify; } } - if (pair_compute_flag) { atomKK->sync(force->pair->execution_space,force->pair->datamask_read); atomKK->sync(force->pair->execution_space,~(~force->pair->datamask_read|(F_MASK | ENERGY_MASK | VIRIAL_MASK))); diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp index 59ed918729..9c34908d08 100644 --- a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp @@ -192,7 +192,7 @@ void FixACKS2ReaxFFKokkos::setup_pre_force(int vflag) /* ---------------------------------------------------------------------- */ template -void FixACKS2ReaxFFKokkos::pre_force(int vflag) +void FixACKS2ReaxFFKokkos::pre_force(int /*vflag*/) { if (update->ntimestep % nevery) return; @@ -298,8 +298,8 @@ void FixACKS2ReaxFFKokkos::pre_force(int vflag) } else { // GPU, use teams Kokkos::deep_copy(d_mfill_offset,0); - int vector_length = 32; int atoms_per_team = 4; + int vector_length = 32; int num_teams = nn / atoms_per_team + (nn % atoms_per_team ? 1 : 0); Kokkos::TeamPolicy policy(num_teams, atoms_per_team, diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.h b/src/KOKKOS/fix_acks2_reaxff_kokkos.h index 127c8d0402..c27719c364 100644 --- a/src/KOKKOS/fix_acks2_reaxff_kokkos.h +++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.h @@ -289,8 +289,7 @@ struct FixACKS2ReaxFFKokkosComputeHFunctor { FixACKS2ReaxFFKokkosComputeHFunctor(FixACKS2ReaxFFKokkos *c_ptr, int _atoms_per_team, int _vector_length) - : c(*c_ptr), atoms_per_team(_atoms_per_team), - vector_length(_vector_length) { + : atoms_per_team(_atoms_per_team), vector_length(_vector_length), c(*c_ptr) { c.cleanup_copy(); }; @@ -337,8 +336,7 @@ struct FixACKS2ReaxFFKokkosComputeXFunctor { FixACKS2ReaxFFKokkosComputeXFunctor(FixACKS2ReaxFFKokkos *c_ptr, int _atoms_per_team, int _vector_length) - : c(*c_ptr), atoms_per_team(_atoms_per_team), - vector_length(_vector_length) { + : atoms_per_team(_atoms_per_team), vector_length(_vector_length), c(*c_ptr) { c.cleanup_copy(); }; diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index b4a852ba70..49fe3f1177 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -453,8 +453,12 @@ KOKKOS_INLINE_FUNCTION void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryUnpackExchange, const int &i) const { int index = d_indices(i); + if (index > -1) { int m = (int) d_ubuf(d_buf(i)).i; + if (i >= nrecv1) + m = nextrarecv1 + (int) d_ubuf(d_buf(nextrarecv1 + i - nrecv1)).i; + int n = (int) d_ubuf(d_buf(m++)).i; d_npartner(index) = n; for (int p = 0; p < n; p++) { @@ -471,6 +475,7 @@ void FixNeighHistoryKokkos::operator()(TagFixNeighHistoryUnpackExcha template void FixNeighHistoryKokkos::unpack_exchange_kokkos( DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, + int nrecv1, int nextrarecv1, ExecutionSpace /*space*/) { d_buf = typename AT::t_xfloat_1d_um( @@ -478,6 +483,9 @@ void FixNeighHistoryKokkos::unpack_exchange_kokkos( k_buf.extent(0)*k_buf.extent(1)); d_indices = k_indices.view(); + this->nrecv1 = nrecv1; + this->nextrarecv1 = nextrarecv1; + d_npartner = k_npartner.template view(); d_partner = k_partner.template view(); d_valuepartner = k_valuepartner.template view(); diff --git a/src/KOKKOS/fix_neigh_history_kokkos.h b/src/KOKKOS/fix_neigh_history_kokkos.h index 9c07a953c4..dd1ad769b8 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.h +++ b/src/KOKKOS/fix_neigh_history_kokkos.h @@ -72,12 +72,14 @@ class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase { void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &indices,int nrecv, + int nrecv1,int nrecv1extra, ExecutionSpace space) override; typename DAT::tdual_int_2d k_firstflag; typename DAT::tdual_float_2d k_firstvalue; private: + int nrecv1,nextrarecv1; int nlocal,nsend,beyond_contact; typename AT::t_tagint_1d tag; diff --git a/src/KOKKOS/fix_nvt_sllod_kokkos.cpp b/src/KOKKOS/fix_nvt_sllod_kokkos.cpp index bd65a6965e..948e3b88f6 100644 --- a/src/KOKKOS/fix_nvt_sllod_kokkos.cpp +++ b/src/KOKKOS/fix_nvt_sllod_kokkos.cpp @@ -128,7 +128,7 @@ void FixNVTSllodKokkos::nh_v_temp() d_h_two = Few(h_two); - if (vdelu.extent(0) < atomKK->nmax) + if ((int)vdelu.extent(0) < atomKK->nmax) vdelu = typename AT::t_v_array(Kokkos::NoInit("nvt/sllod/kk:vdelu"), atomKK->nmax); if (!this->psllod_flag) { diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp index a2a50d84bb..18d7af75a7 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp @@ -1416,6 +1416,7 @@ KOKKOS_INLINE_FUNCTION void FixQEqReaxFFKokkos::operator()(TagQEqUnpackExchange, const int &i) const { int index = d_indices(i); + if (index > -1) { for (int m = 0; m < nprev; m++) d_s_hist(index,m) = d_buf(i*nprev*2 + m); for (int m = 0; m < nprev; m++) d_t_hist(index,m) = d_buf(i*nprev*2 + nprev+m); @@ -1427,6 +1428,7 @@ void FixQEqReaxFFKokkos::operator()(TagQEqUnpackExchange, const int template void FixQEqReaxFFKokkos::unpack_exchange_kokkos( DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, + int /*nrecv1*/, int /*nextrarecv1*/, ExecutionSpace /*space*/) { k_buf.sync(); diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.h b/src/KOKKOS/fix_qeq_reaxff_kokkos.h index 9bc38b0492..6aa345fba6 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.h +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.h @@ -143,6 +143,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &indices,int nrecv, + int nrecv1,int nextrarecv1, ExecutionSpace space) override; struct params_qeq{ diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index dd6de8f9ec..39f4f4d4fe 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -525,7 +525,7 @@ void FixShakeKokkos::operator()(TagFixShakePostForce -int FixShakeKokkos::dof(int igroup) +bigint FixShakeKokkos::dof(int igroup) { d_mask = atomKK->k_mask.view(); d_tag = atomKK->k_tag.view(); @@ -538,7 +538,7 @@ int FixShakeKokkos::dof(int igroup) // count dof in a cluster if and only if // the central atom is in group and atom i is the central atom - int n = 0; + bigint n = 0; { // local variables for lambda capture @@ -549,7 +549,7 @@ int FixShakeKokkos::dof(int igroup) auto groupbit = group->bitmask[igroup]; Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), - LAMMPS_LAMBDA(const int& i, int& n) { + LAMMPS_LAMBDA(const int& i, bigint& n) { if (!(mask[i] & groupbit)) return; if (d_shake_flag[i] == 0) return; if (d_shake_atom(i,0) != tag[i]) return; @@ -560,8 +560,8 @@ int FixShakeKokkos::dof(int igroup) },n); } - int nall; - MPI_Allreduce(&n,&nall,1,MPI_INT,MPI_SUM,world); + bigint nall; + MPI_Allreduce(&n,&nall,1,MPI_LMP_BIGINT,MPI_SUM,world); return nall; } @@ -1581,8 +1581,8 @@ void FixShakeKokkos::pack_exchange_item(const int &mysend, int &offs else offset++; } else { - d_buf[mysend] = nsend + offset; int m = nsend + offset; + d_buf[mysend] = m; d_buf[m++] = flag; if (flag == 1) { d_buf[m++] = d_shake_atom(i,0); @@ -1703,6 +1703,8 @@ void FixShakeKokkos::operator()(TagFixShakeUnpackExchange, const int if (index > -1) { int m = d_buf[i]; + if (i >= nrecv1) + m = nextrarecv1 + d_buf[nextrarecv1 + i - nrecv1]; int flag = d_shake_flag[index] = static_cast (d_buf[m++]); if (flag == 1) { @@ -1739,6 +1741,7 @@ void FixShakeKokkos::operator()(TagFixShakeUnpackExchange, const int template void FixShakeKokkos::unpack_exchange_kokkos( DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, + int nrecv1, int nextrarecv1, ExecutionSpace /*space*/) { k_buf.sync(); @@ -1749,6 +1752,9 @@ void FixShakeKokkos::unpack_exchange_kokkos( k_buf.extent(0)*k_buf.extent(1)); d_indices = k_indices.view(); + this->nrecv1 = nrecv1; + this->nextrarecv1 = nextrarecv1; + k_shake_flag.template sync(); k_shake_atom.template sync(); k_shake_type.template sync(); diff --git a/src/KOKKOS/fix_shake_kokkos.h b/src/KOKKOS/fix_shake_kokkos.h index 185e69ce86..19f3a2343d 100644 --- a/src/KOKKOS/fix_shake_kokkos.h +++ b/src/KOKKOS/fix_shake_kokkos.h @@ -44,8 +44,6 @@ struct TagFixShakeUnpackExchange{}; template class FixShakeKokkos : public FixShake, public KokkosBase { - //friend class FixEHEX; - public: typedef DeviceType device_type; typedef EV_FLOAT value_type; @@ -77,7 +75,7 @@ class FixShakeKokkos : public FixShake, public KokkosBase { void shake_end_of_step(int vflag) override; void correct_coordinates(int vflag) override; - int dof(int) override; + bigint dof(int) override; void unconstrained_update() override; @@ -112,9 +110,12 @@ class FixShakeKokkos : public FixShake, public KokkosBase { void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &indices,int nrecv, + int nrecv1,int nrecv1extra, ExecutionSpace space) override; protected: + int nrecv1,nextrarecv1; + typename AT::t_x_array d_x; typename AT::t_v_array d_v; typename AT::t_f_array d_f; @@ -259,4 +260,3 @@ struct FixShakeKokkosPackExchangeFunctor { #endif #endif - diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index efd8a652ff..6571db37ed 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -184,12 +184,12 @@ void FixSpringSelfKokkos::copy_arrays(int i, int j, int delflag) template KOKKOS_INLINE_FUNCTION -void FixSpringSelfKokkos::pack_exchange_item(const int &mysend, int &offset, const bool &final) const +void FixSpringSelfKokkos::pack_exchange_item(const int &mysend, int &offset, const bool &/*final*/) const { const int i = d_exchange_sendlist(mysend); - d_buf[mysend] = nsend + offset; int m = nsend + offset; + d_buf[mysend] = m; d_buf[m++] = d_xoriginal(i,0); d_buf[m++] = d_xoriginal(i,1); d_buf[m++] = d_xoriginal(i,2); @@ -258,6 +258,8 @@ void FixSpringSelfKokkos::operator()(TagFixSpringSelfUnpackExchange, if (index > -1) { int m = d_buf[i]; + if (i >= nrecv1) + m = nextrarecv1 + d_buf[nextrarecv1 + i - nrecv1]; d_xoriginal(index,0) = static_cast (d_buf[m++]); d_xoriginal(index,1) = static_cast (d_buf[m++]); @@ -270,6 +272,7 @@ void FixSpringSelfKokkos::operator()(TagFixSpringSelfUnpackExchange, template void FixSpringSelfKokkos::unpack_exchange_kokkos( DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, + int nrecv1, int nextrarecv1, ExecutionSpace /*space*/) { k_buf.sync(); @@ -280,6 +283,9 @@ void FixSpringSelfKokkos::unpack_exchange_kokkos( k_buf.extent(0)*k_buf.extent(1)); d_indices = k_indices.view(); + this->nrecv1 = nrecv1; + this->nextrarecv1 = nextrarecv1; + k_xoriginal.template sync(); copymode = 1; diff --git a/src/KOKKOS/fix_spring_self_kokkos.h b/src/KOKKOS/fix_spring_self_kokkos.h index b23e92249b..add5a80bc7 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.h +++ b/src/KOKKOS/fix_spring_self_kokkos.h @@ -58,6 +58,7 @@ class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase { void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &indices,int nrecv, + int nrecv1,int nrecv1extra, ExecutionSpace space) override; @@ -65,6 +66,8 @@ class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase { int unpack_exchange(int, double *) override; protected: + int nrecv1,nextrarecv1; + DAT::tdual_x_array k_xoriginal; typename AT::t_x_array d_xoriginal; diff --git a/src/KOKKOS/fix_wall_gran_kokkos.cpp b/src/KOKKOS/fix_wall_gran_kokkos.cpp index f870b0f240..25e405c798 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.cpp +++ b/src/KOKKOS/fix_wall_gran_kokkos.cpp @@ -419,6 +419,7 @@ void FixWallGranKokkos::operator()(TagFixWallGranUnpackExchange, con template void FixWallGranKokkos::unpack_exchange_kokkos( DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, + int /*nrecv1*/, int /*nextrarecv1*/, ExecutionSpace /*space*/) { d_buf = typename ArrayTypes::t_xfloat_1d_um( @@ -430,7 +431,6 @@ void FixWallGranKokkos::unpack_exchange_kokkos( copymode = 1; - Kokkos::parallel_for(Kokkos::RangePolicy(0,nrecv),*this); copymode = 0; diff --git a/src/KOKKOS/fix_wall_gran_kokkos.h b/src/KOKKOS/fix_wall_gran_kokkos.h index c7d566ec72..ae54fdb085 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.h +++ b/src/KOKKOS/fix_wall_gran_kokkos.h @@ -62,12 +62,13 @@ class FixWallGranKokkos : public FixWallGranOld, public KokkosBase { void operator()(TagFixWallGranUnpackExchange, const int&) const; int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space) override; void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &indices,int nrecv, + int nrecv1,int nrecv1extra, ExecutionSpace space) override; private: @@ -91,6 +92,7 @@ class FixWallGranKokkos : public FixWallGranOld, public KokkosBase { typename AT::t_int_1d d_copylist; typename AT::t_int_1d d_indices; }; + } #endif diff --git a/src/KOKKOS/grid3d_kokkos.cpp b/src/KOKKOS/grid3d_kokkos.cpp index 7b97c417dd..87f2baff84 100644 --- a/src/KOKKOS/grid3d_kokkos.cpp +++ b/src/KOKKOS/grid3d_kokkos.cpp @@ -635,7 +635,7 @@ void Grid3dKokkos::setup_comm_tiled(int &nbuf1, int &nbuf2) ------------------------------------------------------------------------- */ template -void Grid3dKokkos::forward_comm(int caller, void *ptr, int which, int nper, int nbyte, +void Grid3dKokkos::forward_comm(int caller, void *ptr, int which, int nper, int /*nbyte*/, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf2, MPI_Datatype datatype) { @@ -645,7 +645,7 @@ void Grid3dKokkos::forward_comm(int caller, void *ptr, int which, in else forward_comm_kspace_tiled((KSpace *) ptr,which,nper,k_buf1,k_buf2,datatype); } else - error->all(FLERR,"Kokkos grid comm only supports Kspace"); + error->all(FLERR,"Kokkos grid comm currently only supports Kspace"); } /* ---------------------------------------------------------------------- @@ -775,7 +775,7 @@ forward_comm_kspace_tiled(KSpace *kspace, int which, int nper, ------------------------------------------------------------------------- */ template -void Grid3dKokkos::reverse_comm(int caller, void *ptr, int which, int nper, int nbyte, +void Grid3dKokkos::reverse_comm(int caller, void *ptr, int which, int nper, int /*nbyte*/, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf2, MPI_Datatype datatype) { @@ -945,7 +945,7 @@ int Grid3dKokkos::indices(DAT::tdual_int_2d &k_list, int index, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi) { int nmax = (xhi-xlo+1) * (yhi-ylo+1) * (zhi-zlo+1); - if (k_list.extent(1) < nmax) + if ((int)k_list.extent(1) < nmax) k_list.resize(k_list.extent(0),nmax); if (nmax == 0) return 0; diff --git a/src/KOKKOS/kissfft_kokkos.h b/src/KOKKOS/kissfft_kokkos.h index 265677a21c..e24768f774 100644 --- a/src/KOKKOS/kissfft_kokkos.h +++ b/src/KOKKOS/kissfft_kokkos.h @@ -489,7 +489,7 @@ class KissFFTKokkos { * It can be freed with free(), rather than a kiss_fft-specific function. */ - static kiss_fft_state_kokkos kiss_fft_alloc_kokkos(int nfft, int inverse_fft, void *mem, size_t *lenmem) + static kiss_fft_state_kokkos kiss_fft_alloc_kokkos(int nfft, int inverse_fft, void * /*mem*/, size_t * /*lenmem*/) { kiss_fft_state_kokkos st; int i; diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 5572f69901..b8bcd80a00 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -622,7 +622,7 @@ void KokkosLMP::accelerator(int narg, char **arg) int KokkosLMP::neigh_count(int m) { - int inum; + int inum = 0; int nneigh = 0; ArrayTypes::t_int_1d h_ilist; diff --git a/src/KOKKOS/kokkos_base.h b/src/KOKKOS/kokkos_base.h index 1e22a38657..24fcc47579 100644 --- a/src/KOKKOS/kokkos_base.h +++ b/src/KOKKOS/kokkos_base.h @@ -47,6 +47,7 @@ class KokkosBase { ExecutionSpace /*space*/) { return 0; } virtual void unpack_exchange_kokkos(DAT::tdual_xfloat_2d & /*k_buf*/, DAT::tdual_int_1d & /*indices*/, int /*nrecv*/, + int /*nrecv1*/, int /*nextrarecv1*/, ExecutionSpace /*space*/) {} // Region diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index c8ab2198d6..1009e43196 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -453,13 +453,6 @@ struct alignas(2*sizeof(F_FLOAT)) s_FLOAT2 { v[0] = v[1] = 0.0; } - KOKKOS_INLINE_FUNCTION - s_FLOAT2(const s_FLOAT2 & rhs) { - for (int i = 0; i < 2; i++){ - v[i] = rhs.v[i]; - } - } - KOKKOS_INLINE_FUNCTION void operator+=(const s_FLOAT2 &rhs) { v[0] += rhs.v[0]; diff --git a/src/KOKKOS/min_linesearch_kokkos.cpp b/src/KOKKOS/min_linesearch_kokkos.cpp index e8a22f9ddb..2d424957c5 100644 --- a/src/KOKKOS/min_linesearch_kokkos.cpp +++ b/src/KOKKOS/min_linesearch_kokkos.cpp @@ -59,8 +59,8 @@ MinLineSearchKokkos::MinLineSearchKokkos(LAMMPS *lmp) : MinKokkos(lmp) MinLineSearchKokkos::~MinLineSearchKokkos() { - delete [] gextra; - delete [] hextra; + delete[] gextra; + delete[] hextra; } /* ---------------------------------------------------------------------- */ @@ -171,8 +171,8 @@ int MinLineSearchKokkos::linemin_quadratic(double eoriginal, double &alpha) { double fdothall,fdothme,hme,hmaxall; double de_ideal,de; - double delfh,engprev,relerr,alphaprev,fhprev,ff,fh,alpha0; - double dot[2],dotall[2]; + double delfh,engprev,relerr,alphaprev,fhprev,fh,alpha0; + double dot,dotall; double alphamax; fix_minimize_kk->k_vectors.sync(); @@ -280,22 +280,16 @@ int MinLineSearchKokkos::linemin_quadratic(double eoriginal, double &alpha) sdot.d1 += l_fvec[i]*l_h[i]; },sdot); } - dot[0] = sdot.d0; - dot[1] = sdot.d1; + dot = sdot.d1; - MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world); if (nextra_global) { for (int i = 0; i < nextra_global; i++) { - dotall[0] += fextra[i]*fextra[i]; - dotall[1] += fextra[i]*hextra[i]; + dotall += fextra[i]*hextra[i]; } } - ff = dotall[0]; - fh = dotall[1]; - if (output->thermo->normflag) { - ff /= atom->natoms; - fh /= atom->natoms; - } + fh = dotall; + if (output->thermo->normflag) fh /= atom->natoms; delfh = fh - fhprev; diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index fe5484a771..8dd7a1c5ef 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -303,7 +303,7 @@ class NeighborKokkosExecute const typename ArrayTypes::t_int_scalar _h_resize, const typename AT::t_int_scalar _new_maxneighs, const typename ArrayTypes::t_int_scalar _h_new_maxneighs): - neigh_list(_neigh_list), cutneighsq(_cutneighsq),delta(_delta),exclude(_exclude), + neigh_list(_neigh_list),delta(_delta),cutneighsq(_cutneighsq),exclude(_exclude), nex_type(_nex_type),ex1_type(_ex1_type),ex2_type(_ex2_type), ex_type(_ex_type),nex_group(_nex_group), ex1_bit(_ex1_bit),ex2_bit(_ex2_bit), @@ -319,10 +319,11 @@ class NeighborKokkosExecute mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo), bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz), nlocal(_nlocal),nall(_nall),neigh_transpose(_neigh_transpose), + resize(_resize),new_maxneighs(_new_maxneighs), + h_resize(_h_resize),h_new_maxneighs(_h_new_maxneighs), xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic), xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half), - skin(_skin),resize(_resize),h_resize(_h_resize), - new_maxneighs(_new_maxneighs),h_new_maxneighs(_h_new_maxneighs) { + skin(_skin) { if (molecular == 2) moltemplate = 1; else moltemplate = 0; diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index 9521268284..87324b49b9 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -627,7 +627,7 @@ struct PairComputeFunctor { const int itype = c.type(i); const F_FLOAT qtmp = c.q(i); - if (ZEROFLAG) { + if (NEIGHFLAG == FULL && ZEROFLAG) { Kokkos::single(Kokkos::PerThread(team), [&] (){ f(i,0) = 0.0; f(i,1) = 0.0; @@ -674,7 +674,7 @@ struct PairComputeFunctor { const int J_CONTRIB = ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && j < c.nlocal); const E_FLOAT factor = J_CONTRIB?1.0:0.5; - if ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && j < c.nlocal) { + if (J_CONTRIB) { a_f(j,0) -= fx; a_f(j,1) -= fy; a_f(j,2) -= fz; @@ -746,8 +746,10 @@ struct PairComputeFunctor { a_f(i,1) += fev.f[1]; a_f(i,2) += fev.f[2]; - if (c.eflag_global) + if (c.eflag_global) { ev.evdwl += fev.evdwl; + ev.ecoul += fev.ecoul; + } if (c.vflag_global) { ev.v[0] += fev.v[0]; @@ -761,7 +763,7 @@ struct PairComputeFunctor { if (NEIGHFLAG == FULL) { if (c.eflag_atom) - a_eatom(i) += fev.evdwl; + a_eatom(i) += fev.evdwl + fev.ecoul; if (c.vflag_atom) { a_vatom(i,0) += fev.v[0]; @@ -948,9 +950,9 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P static int vectorsize = 0; static int atoms_per_team = 0; - static int lastcall = -1; #if defined(LMP_KOKKOS_GPU) + static int lastcall = -1; if (!vectorsize || lastcall < fpair->lmp->neighbor->lastcall) { lastcall = fpair->lmp->update->ntimestep; vectorsize = GetMaxNeighs(list); diff --git a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp index 4caab0ef55..c7e10d39ef 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp @@ -214,9 +214,7 @@ compute_evdwl(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; englj *= switch1; } - return englj; - } /* ---------------------------------------------------------------------- @@ -488,4 +486,3 @@ template class PairLJCharmmCoulLongKokkos; template class PairLJCharmmCoulLongKokkos; #endif } - diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp new file mode 100644 index 0000000000..f412721411 --- /dev/null +++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp @@ -0,0 +1,497 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy (alphataubio) + + Based on serial kspace lj-fsw sections (force-switched) provided by + Robert Meissner and Lucio Colombi Ciacchi of Bremen University, Germany, + with additional assistance from Robert A. Latour, Clemson University + + ------------------------------------------------------------------------- */ + +#include "pair_lj_charmmfsw_coul_long_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "error.h" +#include "force.h" +#include "kokkos.h" +#include "memory_kokkos.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "neighbor.h" +#include "respa.h" +#include "update.h" + +#include +#include + +using namespace LAMMPS_NS; + + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +template +PairLJCharmmfswCoulLongKokkos::PairLJCharmmfswCoulLongKokkos(LAMMPS *lmp):PairLJCharmmfswCoulLong(lmp) +{ + respa_enable = 0; + + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +PairLJCharmmfswCoulLongKokkos::~PairLJCharmmfswCoulLongKokkos() +{ + if (copymode) return; + + if (allocated) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_cutsq,cutsq); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCharmmfswCoulLongKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + ev_init(eflag,vflag,0); + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.view(); + } + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom"); + d_vatom = k_vatom.view(); + } + + atomKK->sync(execution_space,datamask_read); + k_cutsq.template sync(); + k_params.template sync(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view(); + c_x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + q = atomKK->k_q.view(); + type = atomKK->k_type.view(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; + special_coul[0] = force->special_coul[0]; + special_coul[1] = force->special_coul[1]; + special_coul[2] = force->special_coul[2]; + special_coul[3] = force->special_coul[3]; + qqrd2e = force->qqrd2e; + newton_pair = force->newton_pair; + + // loop over neighbors of my atoms + + copymode = 1; + + EV_FLOAT ev; + if (ncoultablebits) + ev = pair_compute,CoulLongTable<1> > + (this,(NeighListKokkos*)list); + else + ev = pair_compute,CoulLongTable<0> > + (this,(NeighListKokkos*)list); + + + if (eflag) { + eng_vdwl += ev.evdwl; + eng_coul += ev.ecoul; + } + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + copymode = 0; +} + +/* ---------------------------------------------------------------------- + compute LJ CHARMM pair force between atoms i and j + ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCharmmfswCoulLongKokkos:: +compute_fpair(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, + const int& itype, const int& jtype) const { + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT r6inv = r2inv*r2inv*r2inv; + F_FLOAT forcelj, switch1; + + forcelj = r6inv * + ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv - + (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2)); + + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + forcelj = forcelj*switch1; + } + + return forcelj*r2inv; +} + +/* ---------------------------------------------------------------------- + compute LJ CHARMM pair potential energy between atoms i and j + ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCharmmfswCoulLongKokkos:: +compute_evdwl(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, + const int& itype, const int& jtype) const { + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT r6inv = r2inv*r2inv*r2inv; + const F_FLOAT r = sqrt(rsq); + const F_FLOAT rinv = 1.0/r; + const F_FLOAT r3inv = rinv*rinv*rinv; + F_FLOAT englj, englj12, englj6; + + if (rsq > cut_lj_innersq) { + englj12 = (STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*cut_lj6* + denom_lj12 * (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); + englj6 = -(STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)* + cut_lj3*denom_lj6 * (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); + englj = englj12 + englj6; + } else { + englj12 = r6inv*(STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv - + (STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*cut_lj_inner6inv*cut_lj6inv; + englj6 = -(STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)*r6inv + + (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)* + cut_lj_inner3inv*cut_lj3inv; + englj = englj12 + englj6; + } + return englj; +} + +/* ---------------------------------------------------------------------- + compute coulomb pair force between atoms i and j + ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCharmmfswCoulLongKokkos:: +compute_fcoul(const F_FLOAT& rsq, const int& /*i*/, const int&j, + const int& /*itype*/, const int& /*jtype*/, + const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const { + if (Specialisation::DoTable && rsq > tabinnersq) { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits; + const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable]; + const F_FLOAT table = d_ftable[itable] + fraction*d_dftable[itable]; + F_FLOAT forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable]; + const F_FLOAT prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + return forcecoul/rsq; + } else { + const F_FLOAT r = sqrt(rsq); + const F_FLOAT grij = g_ewald * r; + const F_FLOAT expm2 = exp(-grij*grij); + const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij); + const F_FLOAT rinv = 1.0/r; + const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const F_FLOAT prefactor = qqrd2e * qtmp*q[j]*rinv; + F_FLOAT forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + + return forcecoul*rinv*rinv; + } +} + +/* ---------------------------------------------------------------------- + compute coulomb pair potential energy between atoms i and j + ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCharmmfswCoulLongKokkos:: +compute_ecoul(const F_FLOAT& rsq, const int& /*i*/, const int&j, + const int& /*itype*/, const int& /*jtype*/, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const { + if (Specialisation::DoTable && rsq > tabinnersq) { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits; + const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable]; + const F_FLOAT table = d_etable[itable] + fraction*d_detable[itable]; + F_FLOAT ecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable]; + const F_FLOAT prefactor = qtmp*q[j] * table; + ecoul -= (1.0-factor_coul)*prefactor; + } + return ecoul; + } else { + const F_FLOAT r = sqrt(rsq); + const F_FLOAT grij = g_ewald * r; + const F_FLOAT expm2 = exp(-grij*grij); + const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij); + const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const F_FLOAT prefactor = qqrd2e * qtmp*q[j]/r; + F_FLOAT ecoul = prefactor * erfc; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + return ecoul; + } +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template +void PairLJCharmmfswCoulLongKokkos::allocate() +{ + PairLJCharmmfswCoulLong::allocate(); + + int n = atom->ntypes; + + memory->destroy(cutsq); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + d_cutsq = k_cutsq.template view(); + + d_cut_ljsq = typename AT::t_ffloat_2d("pair:cut_ljsq",n+1,n+1); + + d_cut_coulsq = typename AT::t_ffloat_2d("pair:cut_coulsq",n+1,n+1); + + k_params = Kokkos::DualView("PairLJCharmmfswCoulLong::params",n+1,n+1); + params = k_params.template view(); +} + +template +void PairLJCharmmfswCoulLongKokkos::init_tables(double cut_coul, double *cut_respa) +{ + Pair::init_tables(cut_coul,cut_respa); + + typedef typename ArrayTypes::t_ffloat_1d table_type; + typedef typename ArrayTypes::t_ffloat_1d host_table_type; + + int ntable = 1; + for (int i = 0; i < ncoultablebits; i++) ntable *= 2; + + + // Copy rtable and drtable + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + for (int i = 0; i < ntable; i++) { + h_table(i) = rtable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_rtable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + for (int i = 0; i < ntable; i++) { + h_table(i) = drtable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_drtable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + // Copy ftable and dftable + for (int i = 0; i < ntable; i++) { + h_table(i) = ftable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_ftable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + for (int i = 0; i < ntable; i++) { + h_table(i) = dftable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_dftable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + // Copy ctable and dctable + for (int i = 0; i < ntable; i++) { + h_table(i) = ctable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_ctable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + for (int i = 0; i < ntable; i++) { + h_table(i) = dctable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_dctable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + // Copy etable and detable + for (int i = 0; i < ntable; i++) { + h_table(i) = etable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_etable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + for (int i = 0; i < ntable; i++) { + h_table(i) = detable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_detable = d_table; + } +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template +void PairLJCharmmfswCoulLongKokkos::init_style() +{ + PairLJCharmmfswCoulLong::init_style(); + + Kokkos::deep_copy(d_cut_ljsq,cut_ljsq); + Kokkos::deep_copy(d_cut_coulsq,cut_coulsq); + + // error if rRESPA with inner levels + + if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) { + int respa = 0; + if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; + if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + if (respa) + error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle"); + } + + // adjust neighbor list request for KOKKOS + + neighflag = lmp->kokkos->neighflag; + auto request = neighbor->find_request(this); + request->set_kokkos_host(std::is_same_v && + !std::is_same_v); + request->set_kokkos_device(std::is_same_v); + if (neighflag == FULL) request->enable_full(); +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +template +double PairLJCharmmfswCoulLongKokkos::init_one(int i, int j) +{ + double cutone = PairLJCharmmfswCoulLong::init_one(i,j); + + k_params.h_view(i,j).lj1 = lj1[i][j]; + k_params.h_view(i,j).lj2 = lj2[i][j]; + k_params.h_view(i,j).lj3 = lj3[i][j]; + k_params.h_view(i,j).lj4 = lj4[i][j]; + //k_params.h_view(i,j).offset = offset[i][j]; + k_params.h_view(i,j).cut_ljsq = cut_ljsq; + k_params.h_view(i,j).cut_coulsq = cut_coulsq; + + k_params.h_view(j,i) = k_params.h_view(i,j); + if (i(); + k_params.template modify(); + + return cutone; +} + +namespace LAMMPS_NS { +template class PairLJCharmmfswCoulLongKokkos; +#ifdef LMP_KOKKOS_GPU +template class PairLJCharmmfswCoulLongKokkos; +#endif +} diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h new file mode 100644 index 0000000000..7533f40dbc --- /dev/null +++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h @@ -0,0 +1,145 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS +// clang-format off +PairStyle(lj/charmmfsw/coul/long/kk,PairLJCharmmfswCoulLongKokkos); +PairStyle(lj/charmmfsw/coul/long/kk/device,PairLJCharmmfswCoulLongKokkos); +PairStyle(lj/charmmfsw/coul/long/kk/host,PairLJCharmmfswCoulLongKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_KOKKOS_H +#define LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_KOKKOS_H + +#include "pair_kokkos.h" +#include "pair_lj_charmmfsw_coul_long.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template +class PairLJCharmmfswCoulLongKokkos : public PairLJCharmmfswCoulLong { + public: + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + enum {COUL_FLAG=1}; + typedef DeviceType device_type; + typedef ArrayTypes AT; + PairLJCharmmfswCoulLongKokkos(class LAMMPS *); + ~PairLJCharmmfswCoulLongKokkos() override; + + void compute(int, int) override; + + void init_tables(double cut_coul, double *cut_respa) override; + void init_style() override; + double init_one(int, int) override; + + protected: + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, + const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const; + + Kokkos::DualView k_params; + typename Kokkos::DualView::t_dev_const_um params; + // hardwired to space for 12 atom types + params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + + F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + typename AT::t_x_array_randomread x; + typename AT::t_x_array c_x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + typename AT::t_float_1d_randomread q; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + typename AT::t_efloat_1d d_eatom; + typename AT::t_virial_array d_vatom; + + int newton_pair; + + typename AT::tdual_ffloat_2d k_cutsq; + typename AT::t_ffloat_2d d_cutsq; + typename AT::t_ffloat_2d d_cut_ljsq; + typename AT::t_ffloat_2d d_cut_coulsq; + + typename AT::t_ffloat_1d_randomread + d_rtable, d_drtable, d_ftable, d_dftable, + d_ctable, d_dctable, d_etable, d_detable; + + int neighflag; + int nlocal,nall,eflag,vflag; + + double special_coul[4]; + double special_lj[4]; + double qqrd2e; + + void allocate() override; + + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJCharmmfswCoulLongKokkos*, + NeighListKokkos*); + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJCharmmfswCoulLongKokkos*, + NeighListKokkos*); + friend void pair_virial_fdotr_compute(PairLJCharmmfswCoulLongKokkos*); + +}; + +} + +#endif +#endif + diff --git a/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp b/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp index 0980ad776d..ef747ef95c 100644 --- a/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp +++ b/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp @@ -106,7 +106,8 @@ void PairPACEExtrapolationKokkos::grow(int natom, int maxneigh) if ((int)A.extent(0) < natom) { - MemKK::realloc_kokkos(A, "pace:A", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(A_sph, "pace:A_sph", natom, nelements, idx_sph_max, nradmax + 1); + MemKK::realloc_kokkos(A, "pace:A", natom, nelements, (lmax + 1) * (lmax + 1), nradmax + 1); MemKK::realloc_kokkos(A_rank1, "pace:A_rank1", natom, nelements, nradbase); MemKK::realloc_kokkos(A_list, "pace:A_list", natom, idx_ms_combs_max, basis_set->rankmax); @@ -117,7 +118,7 @@ void PairPACEExtrapolationKokkos::grow(int natom, int maxneigh) MemKK::realloc_kokkos(rhos, "pace:rhos", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion MemKK::realloc_kokkos(dF_drho, "pace:dF_drho", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion - MemKK::realloc_kokkos(weights, "pace:weights", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(weights, "pace:weights", natom, nelements, idx_sph_max, nradmax + 1); MemKK::realloc_kokkos(weights_rank1, "pace:weights_rank1", natom, nelements, nradbase); // hard-core repulsion @@ -130,16 +131,16 @@ void PairPACEExtrapolationKokkos::grow(int natom, int maxneigh) MemKK::realloc_kokkos(dB_flatten, "pace:dB_flatten", natom, idx_ms_combs_max, basis_set->rankmax); - //B-projections + // B-projections MemKK::realloc_kokkos(projections, "pace:projections", natom, total_num_functions_max); // per-atom B-projections MemKK::realloc_kokkos(d_gamma, "pace:gamma", natom); // per-atom gamma } - if (((int)ylm.extent(0) < natom) || ((int)ylm.extent(1) < maxneigh)) { + if (((int)fr.extent(0) < natom) || ((int)fr.extent(1) < maxneigh)) { // radial functions - MemKK::realloc_kokkos(fr, "pace:fr", natom, maxneigh, nradmax, lmax + 1); - MemKK::realloc_kokkos(dfr, "pace:dfr", natom, maxneigh, nradmax, lmax + 1); + MemKK::realloc_kokkos(fr, "pace:fr", natom, maxneigh, lmax + 1, nradmax); + MemKK::realloc_kokkos(dfr, "pace:dfr", natom, maxneigh, lmax + 1, nradmax); MemKK::realloc_kokkos(gr, "pace:gr", natom, maxneigh, nradbase); MemKK::realloc_kokkos(dgr, "pace:dgr", natom, maxneigh, nradbase); const int max_num_functions = MAX(nradbase, nradmax*(lmax + 1)); @@ -150,12 +151,6 @@ void PairPACEExtrapolationKokkos::grow(int natom, int maxneigh) MemKK::realloc_kokkos(cr, "pace:cr", natom, maxneigh); MemKK::realloc_kokkos(dcr, "pace:dcr", natom, maxneigh); - // spherical harmonics - MemKK::realloc_kokkos(plm, "pace:plm", natom, maxneigh, (lmax + 1) * (lmax + 1)); - MemKK::realloc_kokkos(dplm, "pace:dplm", natom, maxneigh, (lmax + 1) * (lmax + 1)); - MemKK::realloc_kokkos(ylm, "pace:ylm", natom, maxneigh, (lmax + 1) * (lmax + 1)); - MemKK::realloc_kokkos(dylm, "pace:dylm", natom, maxneigh, (lmax + 1) * (lmax + 1)); - // short neigh list MemKK::realloc_kokkos(d_ncount, "pace:ncount", natom); MemKK::realloc_kokkos(d_mu, "pace:mu", natom, maxneigh); @@ -224,7 +219,6 @@ void PairPACEExtrapolationKokkos::copy_pertype() Kokkos::deep_copy(d_wpre, h_wpre); Kokkos::deep_copy(d_mexp, h_mexp); - // ZBL core-rep MemKK::realloc_kokkos(d_cut_in, "pace:d_cut_in", nelements, nelements); MemKK::realloc_kokkos(d_dcut_in, "pace:d_dcut_in", nelements, nelements); @@ -266,6 +260,9 @@ void PairPACEExtrapolationKokkos::copy_splines() ACERadialFunctions* radial_functions = dynamic_cast(basis_set->radial_functions); + if (radial_functions == nullptr) + error->all(FLERR,"Chosen radial basis style not supported by pair style pace/kk"); + for (int i = 0; i < nelements; i++) { for (int j = 0; j < nelements; j++) { k_splines_gk.h_view(i, j) = radial_functions->splines_gk(i, j); @@ -297,8 +294,9 @@ void PairPACEExtrapolationKokkos::copy_tilde() total_num_functions_max = 0; MemKK::realloc_kokkos(d_idx_ms_combs_count, "pace:idx_ms_combs_count", nelements); - MemKK::realloc_kokkos(d_total_basis_size, "pace:total_basis_size", nelements); auto h_idx_ms_combs_count = Kokkos::create_mirror_view(d_idx_ms_combs_count); + + MemKK::realloc_kokkos(d_total_basis_size, "pace:total_basis_size", nelements); auto h_total_basis_size = Kokkos::create_mirror_view(d_total_basis_size); for (int mu = 0; mu < nelements; mu++) { @@ -313,8 +311,8 @@ void PairPACEExtrapolationKokkos::copy_tilde() idx_ms_combs++; // rank > 1 - for (int func_ind = 0; func_ind < total_basis_size; ++func_ind) { - ACEBBasisFunction *func = &basis[func_ind]; + for (int idx_func = 0; idx_func < total_basis_size; ++idx_func) { + ACEBBasisFunction *func = &basis[idx_func]; // loop over {ms} combinations in sum for (int ms_ind = 0; ms_ind < func->num_ms_combs; ++ms_ind) @@ -331,7 +329,7 @@ void PairPACEExtrapolationKokkos::copy_tilde() MemKK::realloc_kokkos(d_rank, "pace:rank", nelements, total_num_functions_max); MemKK::realloc_kokkos(d_num_ms_combs, "pace:num_ms_combs", nelements, total_num_functions_max); - MemKK::realloc_kokkos(d_func_inds, "pace:func_inds", nelements, idx_ms_combs_max); + MemKK::realloc_kokkos(d_idx_funcs, "pace:idx_funcs", nelements, idx_ms_combs_max); MemKK::realloc_kokkos(d_mus, "pace:mus", nelements, total_num_functions_max, basis_set->rankmax); MemKK::realloc_kokkos(d_ns, "pace:ns", nelements, total_num_functions_max, basis_set->rankmax); MemKK::realloc_kokkos(d_ls, "pace:ls", nelements, total_num_functions_max, basis_set->rankmax); @@ -344,7 +342,7 @@ void PairPACEExtrapolationKokkos::copy_tilde() auto h_rank = Kokkos::create_mirror_view(d_rank); auto h_num_ms_combs = Kokkos::create_mirror_view(d_num_ms_combs); - auto h_func_inds = Kokkos::create_mirror_view(d_func_inds); + auto h_idx_funcs = Kokkos::create_mirror_view(d_idx_funcs); auto h_mus = Kokkos::create_mirror_view(d_mus); auto h_ns = Kokkos::create_mirror_view(d_ns); auto h_ls = Kokkos::create_mirror_view(d_ls); @@ -365,55 +363,52 @@ void PairPACEExtrapolationKokkos::copy_tilde() const int ndensity = basis_set->map_embedding_specifications.at(mu).ndensity; - int idx_ms_comb = 0; + int idx_ms_combs = 0; // rank=1 - for (int func_ind = 0; func_ind < total_basis_size_rank1; ++func_ind) { - ACEBBasisFunction *func = &basis_rank1[func_ind]; - h_rank(mu, func_ind) = 1; - h_mus(mu, func_ind, 0) = func->mus[0]; - h_ns(mu, func_ind, 0) = func->ns[0]; + for (int idx_func = 0; idx_func < total_basis_size_rank1; ++idx_func) { + ACEBBasisFunction *func = &basis_rank1[idx_func]; + h_rank(mu, idx_func) = 1; + h_mus(mu, idx_func, 0) = func->mus[0]; + h_ns(mu, idx_func, 0) = func->ns[0]; for (int p = 0; p < ndensity; ++p) - h_coeffs(mu, func_ind, p) = func->coeff[p]; + h_coeffs(mu, idx_func, p) = func->coeff[p]; - h_gen_cgs(mu, idx_ms_comb) = func->gen_cgs[0]; + h_gen_cgs(mu, idx_ms_combs) = func->gen_cgs[0]; - h_func_inds(mu, idx_ms_comb) = func_ind; - idx_ms_comb++; + h_idx_funcs(mu, idx_ms_combs) = idx_func; + idx_ms_combs++; } // rank > 1 - for (int func_ind = 0; func_ind < total_basis_size; ++func_ind) { - ACEBBasisFunction *func = &basis[func_ind]; + for (int idx_func = 0; idx_func < total_basis_size; ++idx_func) { + ACEBBasisFunction *func = &basis[idx_func]; // TODO: check if func->ctildes are zero, then skip - const int func_ind_through = total_basis_size_rank1 + func_ind; + const int idx_func_through = total_basis_size_rank1 + idx_func; - const int rank = h_rank(mu, func_ind_through) = func->rank; - h_num_ms_combs(mu, func_ind_through) = func->num_ms_combs; + const int rank = h_rank(mu, idx_func_through) = func->rank; + h_num_ms_combs(mu, idx_func_through) = func->num_ms_combs; for (int t = 0; t < rank; t++) { - h_mus(mu, func_ind_through, t) = func->mus[t]; - h_ns(mu, func_ind_through, t) = func->ns[t]; - h_ls(mu, func_ind_through, t) = func->ls[t]; + h_mus(mu, idx_func_through, t) = func->mus[t]; + h_ns(mu, idx_func_through, t) = func->ns[t]; + h_ls(mu, idx_func_through, t) = func->ls[t]; } for (int p = 0; p < ndensity; ++p) - h_coeffs(mu, func_ind_through, p) = func->coeff[p]; - + h_coeffs(mu, idx_func_through, p) = func->coeff[p]; // loop over {ms} combinations in sum for (int ms_ind = 0; ms_ind < func->num_ms_combs; ++ms_ind) { auto ms = &func->ms_combs[ms_ind * rank]; // current ms-combination (of length = rank) for (int t = 0; t < rank; t++) - h_ms_combs(mu, idx_ms_comb, t) = ms[t]; + h_ms_combs(mu, idx_ms_combs, t) = ms[t]; + h_gen_cgs(mu, idx_ms_combs) = func->gen_cgs[ms_ind]; - h_gen_cgs(mu, idx_ms_comb) = func->gen_cgs[ms_ind]; - - - h_func_inds(mu, idx_ms_comb) = func_ind_through; - idx_ms_comb++; + h_idx_funcs(mu, idx_ms_combs) = idx_func_through; + idx_ms_combs++; } } @@ -427,7 +422,7 @@ void PairPACEExtrapolationKokkos::copy_tilde() Kokkos::deep_copy(d_rank, h_rank); Kokkos::deep_copy(d_num_ms_combs, h_num_ms_combs); - Kokkos::deep_copy(d_func_inds, h_func_inds); + Kokkos::deep_copy(d_idx_funcs, h_idx_funcs); Kokkos::deep_copy(d_mus, h_mus); Kokkos::deep_copy(d_ns, h_ns); Kokkos::deep_copy(d_ls, h_ls); @@ -477,6 +472,7 @@ void PairPACEExtrapolationKokkos::init_style() // spherical harmonics + MemKK::realloc_kokkos(d_idx_sph, "pace:idx_sph", (lmax + 1) * (lmax + 1)); MemKK::realloc_kokkos(alm, "pace:alm", (lmax + 1) * (lmax + 1)); MemKK::realloc_kokkos(blm, "pace:blm", (lmax + 1) * (lmax + 1)); MemKK::realloc_kokkos(cl, "pace:cl", lmax + 1); @@ -575,6 +571,7 @@ void PairPACEExtrapolationKokkos::compute(int eflag_in, int vflag_in atomKK->modified(Host,F_MASK); return; } + eflag = eflag_in; vflag = vflag_in; @@ -602,6 +599,7 @@ void PairPACEExtrapolationKokkos::compute(int eflag_in, int vflag_in //zeroify array memset(extrapolation_grade_gamma, 0, nmax * sizeof(*extrapolation_grade_gamma)); } + if (flag_corerep_factor && atom->nlocal > nmax_corerep) { memory->destroy(corerep_factor); nmax_corerep = atom->nlocal; @@ -647,7 +645,6 @@ void PairPACEExtrapolationKokkos::compute(int eflag_in, int vflag_in chunk_size = MIN(chunksize,inum); // "chunksize" variable is set by user chunk_offset = 0; - grow(chunk_size, maxneigh); EV_FLOAT ev; @@ -656,14 +653,12 @@ void PairPACEExtrapolationKokkos::compute(int eflag_in, int vflag_in Kokkos::deep_copy(weights, 0.0); Kokkos::deep_copy(weights_rank1, 0.0); - Kokkos::deep_copy(A, 0.0); + Kokkos::deep_copy(A_sph, 0.0); Kokkos::deep_copy(A_rank1, 0.0); Kokkos::deep_copy(rhos, 0.0); - Kokkos::deep_copy(rho_core, 0.0); Kokkos::deep_copy(d_d_min, PairPACEExtrapolation::aceimpl->basis_set->cutoffmax); Kokkos::deep_copy(d_jj_min, -1); - Kokkos::deep_copy(projections, 0.0); Kokkos::deep_copy(d_gamma, 0.0); Kokkos::deep_copy(d_corerep, 0.0); @@ -693,15 +688,6 @@ void PairPACEExtrapolationKokkos::compute(int eflag_in, int vflag_in Kokkos::parallel_for("ComputeRadial",policy_radial,*this); } - //ComputeYlm - { - int vector_length = vector_length_default; - int team_size = 16; - check_team_size_for(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); - typename Kokkos::TeamPolicy policy_ylm(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); - Kokkos::parallel_for("ComputeYlm",policy_ylm,*this); - } - //ComputeAi { int vector_length = vector_length_default; @@ -737,7 +723,7 @@ void PairPACEExtrapolationKokkos::compute(int eflag_in, int vflag_in //ComputeWeights { - typename Kokkos::RangePolicy policy_weights(0, chunk_size * idx_ms_combs_max); + typename Kokkos::RangePolicy policy_weights(0,chunk_size * idx_ms_combs_max); Kokkos::parallel_for("ComputeWeights",policy_weights,*this); } @@ -746,7 +732,7 @@ void PairPACEExtrapolationKokkos::compute(int eflag_in, int vflag_in int vector_length = vector_length_default; int team_size = team_size_default; check_team_size_for(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); - typename Kokkos::TeamPolicy policy_derivative(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); + typename Kokkos::TeamPolicy policy_derivative(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); Kokkos::parallel_for("ComputeDerivative",policy_derivative,*this); } @@ -772,16 +758,18 @@ void PairPACEExtrapolationKokkos::compute(int eflag_in, int vflag_in } ev += ev_tmp; - //if flag_compute_extrapolation_grade - copy current d_gamma to extrapolation_grade_gamma + // if flag_compute_extrapolation_grade - copy current d_gamma to extrapolation_grade_gamma + if (flag_compute_extrapolation_grade){ h_gamma = Kokkos::create_mirror_view(d_gamma); Kokkos::deep_copy(h_gamma, d_gamma); memcpy(extrapolation_grade_gamma+chunk_offset, (void *) h_gamma.data(), sizeof(double)*chunk_size); } + if (flag_corerep_factor) { - h_corerep = Kokkos::create_mirror_view(d_corerep); - Kokkos::deep_copy(h_corerep,d_corerep); - memcpy(corerep_factor+chunk_offset, (void *) h_corerep.data(), sizeof(double)*chunk_size); + h_corerep = Kokkos::create_mirror_view(d_corerep); + Kokkos::deep_copy(h_corerep,d_corerep); + memcpy(corerep_factor+chunk_offset, (void *) h_corerep.data(), sizeof(double)*chunk_size); } chunk_offset += chunk_size; @@ -909,18 +897,17 @@ void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeNeig Kokkos::MinLoc reducer_scalar(djjmin); // loop over ncount (actual neighbours withing cutoff) rather than jnum (total number of neigh in cutoff+skin) Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, ncount), - [&](const int offset, minloc_value_type &min_d_dist) { - int j = d_nearest(ii,offset); - j &= NEIGHMASK; - const int jtype = type(j); - auto r = d_rnorms(ii,offset); - const int mu_j = d_map(type(j)); - const F_FLOAT d = r - (d_cut_in(mu_i, mu_j) - d_dcut_in(mu_i, mu_j)); - if (d < min_d_dist.val) { - min_d_dist.val = d; - min_d_dist.loc = offset; - } - }, reducer_scalar); + [&](const int offset, minloc_value_type &min_d_dist) { + int j = d_nearest(ii,offset); + j &= NEIGHMASK; + auto r = d_rnorms(ii,offset); + const int mu_j = d_map(type(j)); + const F_FLOAT d = r - (d_cut_in(mu_i, mu_j) - d_dcut_in(mu_i, mu_j)); + if (d < min_d_dist.val) { + min_d_dist.val = d; + min_d_dist.loc = offset; + } + }, reducer_scalar); d_d_min(ii) = djjmin.val; d_jj_min(ii) = djjmin.loc;// d_jj_min should be NOT in 0..jnum range, but in 0..d_ncount(<=jnum) } else { @@ -956,28 +943,6 @@ void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeRadi /* ---------------------------------------------------------------------- */ -template -KOKKOS_INLINE_FUNCTION -void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeYlm, const typename Kokkos::TeamPolicy::member_type& team) const -{ - // Extract the atom number - int ii = team.team_rank() + team.team_size() * (team.league_rank() % - ((chunk_size+team.team_size()-1)/team.team_size())); - if (ii >= chunk_size) return; - - // Extract the neighbor number - const int jj = team.league_rank() / ((chunk_size+team.team_size()-1)/team.team_size()); - const int ncount = d_ncount(ii); - if (jj >= ncount) return; - - const double xn = d_rhats(ii, jj, 0); - const double yn = d_rhats(ii, jj, 1); - const double zn = d_rhats(ii, jj, 2); - compute_ylm(ii,jj,xn,yn,zn,lmax); -} - -/* ---------------------------------------------------------------------- */ - template KOKKOS_INLINE_FUNCTION void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeAi, const typename Kokkos::TeamPolicy::member_type& team) const @@ -999,13 +964,127 @@ void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeAi, Kokkos::atomic_add(&A_rank1(ii, mu_j, n), gr(ii, jj, n) * Y00); // rank > 1 - for (int n = 0; n < nradmax; n++) { - for (int l = 0; l <= lmax; l++) { - for (int m = 0; m <= l; m++) { - const int idx = l * (l + 1) + m; // (l, m) - Kokkos::atomic_add(&A(ii, mu_j, n, idx).re, fr(ii, jj, n, l) * ylm(ii, jj, idx).re); - Kokkos::atomic_add(&A(ii, mu_j, n, idx).im, fr(ii, jj, n, l) * ylm(ii, jj, idx).im); + + // Compute plm and ylm + + // requires rx^2 + ry^2 + rz^2 = 1 , NO CHECKING IS PERFORMED !!!!!!!!! + // requires -1 <= rz <= 1 , NO CHECKING IS PERFORMED !!!!!!!!! + // prefactors include 1/sqrt(2) factor compared to reference + + complex ylm, phase; + complex phasem, mphasem1; + complex dyx, dyy, dyz; + complex rdy; + + const double rx = d_rhats(ii, jj, 0); + const double ry = d_rhats(ii, jj, 1); + const double rz = d_rhats(ii, jj, 2); + + phase.re = rx; + phase.im = ry; + + double plm_idx,plm_idx1,plm_idx2; + + plm_idx = plm_idx1 = plm_idx2 = 0.0; + + int idx_sph = 0; + + // m = 0 + for (int l = 0; l <= lmax; l++) { + // const int idx = l * (l + 1); + + if (l == 0) { + // l=0, m=0 + // plm[0] = Y00/sq1o4pi; //= sq1o4pi; + plm_idx = Y00; //= 1; + } else if (l == 1) { + // l=1, m=0 + plm_idx = Y00 * sq3 * rz; + } else { + // l>=2, m=0 + plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2); + } + + ylm.re = plm_idx; + ylm.im = 0.0; + + for (int n = 0; n < nradmax; n++) { + Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).re, fr(ii, jj, l, n) * ylm.re); + Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).im, fr(ii, jj, l, n) * ylm.im); + } + + plm_idx2 = plm_idx1; + plm_idx1 = plm_idx; + + idx_sph++; + } + + plm_idx = plm_idx1 = plm_idx2 = 0.0; + + // m = 1 + for (int l = 1; l <= lmax; l++) { + // const int idx = l * (l + 1) + 1; // (l, 1) + + if (l == 1) { + // l=1, m=1 + plm_idx = -sq3o2 * Y00; + } else if (l == 2) { + const double t = dl(l) * plm_idx1; + plm_idx = t * rz; + } else { + plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2); + } + + ylm = phase * plm_idx; + + for (int n = 0; n < nradmax; n++) { + Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).re, fr(ii, jj, l, n) * ylm.re); + Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).im, fr(ii, jj, l, n) * ylm.im); + } + + plm_idx2 = plm_idx1; + plm_idx1 = plm_idx; + + idx_sph++; + } + + plm_idx = plm_idx1 = plm_idx2 = 0.0; + + double plm_mm1_mm1 = -sq3o2 * Y00; // (1, 1) + + // m > 1 + phasem = phase; + for (int m = 2; m <= lmax; m++) { + + mphasem1.re = phasem.re * double(m); + mphasem1.im = phasem.im * double(m); + phasem = phasem * phase; + + for (int l = m; l <= lmax; l++) { + // const int idx = l * (l + 1) + m; + + if (l == m) { + plm_idx = cl(l) * plm_mm1_mm1; // (m+1, m) + plm_mm1_mm1 = plm_idx; + } else if (l == (m + 1)) { + const double t = dl(l) * plm_mm1_mm1; // (m - 1, m - 1) + plm_idx = t * rz; // (m, m) + } else { + plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2); } + + ylm.re = phasem.re * plm_idx; + ylm.im = phasem.im * plm_idx; + + for (int n = 0; n < nradmax; n++) { + Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).re, fr(ii, jj, l, n) * ylm.re); + Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).im, fr(ii, jj, l, n) * ylm.im); + } + + plm_idx2 = plm_idx1; + plm_idx1 = plm_idx; + + idx_sph++; } } @@ -1019,17 +1098,35 @@ template KOKKOS_INLINE_FUNCTION void PairPACEExtrapolationKokkos::operator() (TagPairPACEConjugateAi, const int& ii) const { - //complex conjugate A's (for NEGATIVE (-m) terms) - // for rank > 1 for (int mu_j = 0; mu_j < nelements; mu_j++) { - for (int n = 0; n < nradmax; n++) { - for (int l = 0; l <= lmax; l++) { + + // transpose + + int idx_sph = 0; + + for (int m = 0; m <= lmax; m++) { + for (int l = m; l <= lmax; l++) { + const int idx = l * (l + 1) + m; + for (int n = 0; n < nradmax; n++) { + A(ii, mu_j, idx, n) = A_sph(ii, mu_j, idx_sph, n); + } + + idx_sph++; + } + } + + // complex conjugate A's (for NEGATIVE (-m) terms) + // for rank > 1 + + for (int l = 0; l <= lmax; l++) { //fill in -m part in the outer loop using the same m <-> -m symmetry as for Ylm - for (int m = 1; m <= l; m++) { - const int idx = l * (l + 1) + m; // (l, m) - const int idxm = l * (l + 1) - m; // (l, -m) - const int factor = m % 2 == 0 ? 1 : -1; - A(ii, mu_j, n, idxm) = A(ii, mu_j, n, idx).conj() * (double)factor; + for (int m = 1; m <= l; m++) { + const int idx = l * (l + 1) + m; // (l, m) + const int idxm = l * (l + 1) - m; // (l, -m) + const int idx_sph = d_idx_sph(idx); + const int factor = m % 2 == 0 ? 1 : -1; + for (int n = 0; n < nradmax; n++) { + A(ii, mu_j, idxm, n) = A_sph(ii, mu_j, idx_sph, n).conj() * (double)factor; } } } @@ -1042,73 +1139,72 @@ template KOKKOS_INLINE_FUNCTION void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeRho, const int& iter) const { - const int idx_ms_comb = iter / chunk_size; + const int idx_ms_combs = iter / chunk_size; const int ii = iter % chunk_size; const int i = d_ilist[ii + chunk_offset]; const int mu_i = d_map(type(i)); - if (idx_ms_comb >= d_idx_ms_combs_count(mu_i)) return; + if (idx_ms_combs >= d_idx_ms_combs_count(mu_i)) return; const int ndensity = d_ndensity(mu_i); - const int func_ind = d_func_inds(mu_i, idx_ms_comb); - const int rank = d_rank(mu_i, func_ind); + const int idx_func = d_idx_funcs(mu_i, idx_ms_combs); + const int rank = d_rank(mu_i, idx_func); const int r = rank - 1; // Basis functions B with iterative product and density rho(p) calculation if (rank == 1) { - const int mu = d_mus(mu_i, func_ind, 0); - const int n = d_ns(mu_i, func_ind, 0); + const int mu = d_mus(mu_i, idx_func, 0); + const int n = d_ns(mu_i, idx_func, 0); double A_cur = A_rank1(ii, mu, n - 1); for (int p = 0; p < ndensity; ++p) { //for rank=1 (r=0) only 1 ms-combination exists (ms_ind=0), so index of func.ctildes is 0..ndensity-1 - Kokkos::atomic_add(&rhos(ii, p), d_coeffs(mu_i, func_ind, p) * d_gen_cgs(mu_i, idx_ms_comb) * A_cur); + Kokkos::atomic_add(&rhos(ii, p), d_coeffs(mu_i, idx_func, p) * d_gen_cgs(mu_i, idx_ms_combs) * A_cur); } - - //gamma_i + // gamma_i if (flag_compute_extrapolation_grade) - Kokkos::atomic_add(&projections(ii, func_ind), d_gen_cgs(mu_i, idx_ms_comb) * A_cur); + Kokkos::atomic_add(&projections(ii, idx_func), d_gen_cgs(mu_i, idx_ms_combs) * A_cur); } else { // rank > 1 // loop over {ms} combinations in sum // loop over m, collect B = product of A with given ms - A_forward_prod(ii, idx_ms_comb, 0) = complex::one(); + A_forward_prod(ii, idx_ms_combs, 0) = complex::one(); // fill forward A-product triangle for (int t = 0; t < rank; t++) { //TODO: optimize ns[t]-1 -> ns[t] during functions construction - const int mu = d_mus(mu_i, func_ind, t); - const int n = d_ns(mu_i, func_ind, t); - const int l = d_ls(mu_i, func_ind, t); - const int m = d_ms_combs(mu_i, idx_ms_comb, t); // current ms-combination (of length = rank) + const int mu = d_mus(mu_i, idx_func, t); + const int n = d_ns(mu_i, idx_func, t); + const int l = d_ls(mu_i, idx_func, t); + const int m = d_ms_combs(mu_i, idx_ms_combs, t); // current ms-combination (of length = rank) const int idx = l * (l + 1) + m; // (l, m) - A_list(ii, idx_ms_comb, t) = A(ii, mu, n - 1, idx); - A_forward_prod(ii, idx_ms_comb, t + 1) = A_forward_prod(ii, idx_ms_comb, t) * A_list(ii, idx_ms_comb, t); + A_list(ii, idx_ms_combs, t) = A(ii, mu, idx, n - 1); + A_forward_prod(ii, idx_ms_combs, t + 1) = A_forward_prod(ii, idx_ms_combs, t) * A_list(ii, idx_ms_combs, t); } complex A_backward_prod = complex::one(); // fill backward A-product triangle for (int t = r; t >= 1; t--) { - const complex dB = A_forward_prod(ii, idx_ms_comb, t) * A_backward_prod; // dB - product of all A's except t-th - dB_flatten(ii, idx_ms_comb, t) = dB; + const complex dB = A_forward_prod(ii, idx_ms_combs, t) * A_backward_prod; // dB - product of all A's except t-th + dB_flatten(ii, idx_ms_combs, t) = dB; - A_backward_prod = A_backward_prod * A_list(ii, idx_ms_comb, t); + A_backward_prod = A_backward_prod * A_list(ii, idx_ms_combs, t); } - dB_flatten(ii, idx_ms_comb, 0) = A_forward_prod(ii, idx_ms_comb, 0) * A_backward_prod; + dB_flatten(ii, idx_ms_combs, 0) = A_forward_prod(ii, idx_ms_combs, 0) * A_backward_prod; - const complex B = A_forward_prod(ii, idx_ms_comb, rank); + const complex B = A_forward_prod(ii, idx_ms_combs, rank); for (int p = 0; p < ndensity; ++p) { // real-part only multiplication - Kokkos::atomic_add(&rhos(ii, p), B.real_part_product(d_coeffs(mu_i, func_ind, p) * d_gen_cgs(mu_i, idx_ms_comb))); + Kokkos::atomic_add(&rhos(ii, p), B.real_part_product(d_coeffs(mu_i, idx_func, p) * d_gen_cgs(mu_i, idx_ms_combs))); } - //gamma_i + // gamma_i if (flag_compute_extrapolation_grade) - Kokkos::atomic_add(&projections(ii, func_ind), B.real_part_product(d_gen_cgs(mu_i, idx_ms_comb))); + Kokkos::atomic_add(&projections(ii, idx_func), B.real_part_product(d_gen_cgs(mu_i, idx_ms_combs))); } } @@ -1129,7 +1225,6 @@ void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeFS, double evdwl_cut; evdwl = fcut = dfcut = 0.0; - inner_cutoff(rho_core(ii), rho_cut, drho_cut, fcut, dfcut); FS_values_and_derivatives(ii, evdwl, mu_i); if (is_zbl) { @@ -1155,7 +1250,6 @@ void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeFS, for (int p = 0; p < ndensity; ++p) dF_drho(ii, p) *= fcut; - // tally energy contribution if (eflag) { // E0 shift @@ -1201,52 +1295,58 @@ template KOKKOS_INLINE_FUNCTION void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeWeights, const int& iter) const { - const int idx_ms_comb = iter / chunk_size; + const int idx_ms_combs = iter / chunk_size; const int ii = iter % chunk_size; const int i = d_ilist[ii + chunk_offset]; const int mu_i = d_map(type(i)); - if (idx_ms_comb >= d_idx_ms_combs_count(mu_i)) return; + if (idx_ms_combs >= d_idx_ms_combs_count(mu_i)) return; const int ndensity = d_ndensity(mu_i); - const int func_ind = d_func_inds(mu_i, idx_ms_comb); - const int rank = d_rank(mu_i, func_ind); + const int idx_func = d_idx_funcs(mu_i, idx_ms_combs); + const int rank = d_rank(mu_i, idx_func); // Weights and theta calculation if (rank == 1) { - const int mu = d_mus(mu_i, func_ind, 0); - const int n = d_ns(mu_i, func_ind, 0); + const int mu = d_mus(mu_i, idx_func, 0); + const int n = d_ns(mu_i, idx_func, 0); double theta = 0.0; for (int p = 0; p < ndensity; ++p) { // for rank=1 (r=0) only 1 ms-combination exists (ms_ind=0), so index of func.ctildes is 0..ndensity-1 - theta += dF_drho(ii, p) * d_coeffs(mu_i, func_ind, p) * d_gen_cgs(mu_i, idx_ms_comb); + theta += dF_drho(ii, p) * d_coeffs(mu_i, idx_func, p) * d_gen_cgs(mu_i, idx_ms_combs); } Kokkos::atomic_add(&weights_rank1(ii, mu, n - 1), theta); } else { // rank > 1 double theta = 0.0; for (int p = 0; p < ndensity; ++p) - theta += dF_drho(ii, p) * d_coeffs(mu_i, func_ind, p) * d_gen_cgs(mu_i, idx_ms_comb); + theta += dF_drho(ii, p) * d_coeffs(mu_i, idx_func, p) * d_gen_cgs(mu_i, idx_ms_combs); theta *= 0.5; // 0.5 factor due to possible double counting ??? for (int t = 0; t < rank; ++t) { - const int m_t = d_ms_combs(mu_i, idx_ms_comb, t); + const int m_t = d_ms_combs(mu_i, idx_ms_combs, t); const int factor = (m_t % 2 == 0 ? 1 : -1); - const complex dB = dB_flatten(ii, idx_ms_comb, t); - const int mu_t = d_mus(mu_i, func_ind, t); - const int n_t = d_ns(mu_i, func_ind, t); - const int l_t = d_ls(mu_i, func_ind, t); + const complex dB = dB_flatten(ii, idx_ms_combs, t); + const int mu_t = d_mus(mu_i, idx_func, t); + const int n_t = d_ns(mu_i, idx_func, t); + const int l_t = d_ls(mu_i, idx_func, t); const int idx = l_t * (l_t + 1) + m_t; // (l, m) - const complex value = theta * dB; - Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idx).re), value.re); - Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idx).im), value.im); + const int idx_sph = d_idx_sph(idx); + if (idx_sph >= 0) { + const complex value = theta * dB; + Kokkos::atomic_add(&(weights(ii, mu_t, idx_sph, n_t - 1).re), value.re); + Kokkos::atomic_add(&(weights(ii, mu_t, idx_sph, n_t - 1).im), value.im); + } // update -m_t (that could also be positive), because the basis is half_basis const int idxm = l_t * (l_t + 1) - m_t; // (l, -m) - const complex valuem = theta * dB.conj() * (double)factor; - Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idxm).re), valuem.re); - Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idxm).im), valuem.im); + const int idxm_sph = d_idx_sph(idxm); + if (idxm_sph >= 0) { + const complex valuem = theta * dB.conj() * (double)factor; + Kokkos::atomic_add(&(weights(ii, mu_t, idxm_sph, n_t - 1).re), valuem.re); + Kokkos::atomic_add(&(weights(ii, mu_t, idxm_sph, n_t - 1).im), valuem.im); + } } } } @@ -1293,37 +1393,239 @@ void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeDeri } // for rank > 1 - for (int n = 0; n < nradmax; n++) { - for (int l = 0; l <= lmax; l++) { - const double R_over_r = fr(ii, jj, n, l) * rinv; - const double DR = dfr(ii, jj, n, l); - // for m >= 0 - for (int m = 0; m <= l; m++) { - const int idx = l * (l + 1) + m; // (l, m) - complex w = weights(ii, mu_j, n, idx); + // compute plm, dplm, ylm and dylm + // requires rx^2 + ry^2 + rz^2 = 1 , NO CHECKING IS PERFORMED !!!!!!!!! + // requires -1 <= rz <= 1 , NO CHECKING IS PERFORMED !!!!!!!!! + // prefactors include 1/sqrt(2) factor compared to reference + + complex ylm,dylm[3]; + complex phase; + complex phasem, mphasem1; + complex dyx, dyy, dyz; + complex rdy; + + const double rx = d_rhats(ii, jj, 0); + const double ry = d_rhats(ii, jj, 1); + const double rz = d_rhats(ii, jj, 2); + + phase.re = rx; + phase.im = ry; + + double plm_idx,plm_idx1,plm_idx2; + double dplm_idx,dplm_idx1,dplm_idx2; + + plm_idx = plm_idx1 = plm_idx2 = 0.0; + dplm_idx = dplm_idx1 = dplm_idx2 = 0.0; + + int idx_sph = 0; + + // m = 0 + for (int l = 0; l <= lmax; l++) { + // const int idx = l * (l + 1); + + if (l == 0) { + // l=0, m=0 + // plm[0] = Y00/sq1o4pi; //= sq1o4pi; + plm_idx = Y00; //= 1; + dplm_idx = 0.0; + } else if (l == 1) { + // l=1, m=0 + plm_idx = Y00 * sq3 * rz; + dplm_idx = Y00 * sq3; + } else { + // l>=2, m=0 + plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2); + dplm_idx = alm(idx_sph) * (plm_idx1 + rz * dplm_idx1 + blm(idx_sph) * dplm_idx2); + } + + ylm.re = plm_idx; + ylm.im = 0.0; + + dyz.re = dplm_idx; + rdy.re = dyz.re * rz; + + dylm[0].re = -rdy.re * rx; + dylm[0].im = 0.0; + dylm[1].re = -rdy.re * ry; + dylm[1].im = 0.0; + dylm[2].re = dyz.re - rdy.re * rz; + dylm[2].im = 0; + + for (int n = 0; n < nradmax; n++) { + + const double R_over_r = fr(ii, jj, l, n) * rinv; + const double DR = dfr(ii, jj, l, n); + const complex Y_DR = ylm * DR; + + complex w = weights(ii, mu_j, idx_sph, n); + if (w.re == 0.0 && w.im == 0.0) continue; + + complex grad_phi_nlm[3]; + grad_phi_nlm[0] = Y_DR * r_hat[0] + dylm[0] * R_over_r; + grad_phi_nlm[1] = Y_DR * r_hat[1] + dylm[1] * R_over_r; + grad_phi_nlm[2] = Y_DR * r_hat[2] + dylm[2] * R_over_r; + // real-part multiplication only + f_ji[0] += w.real_part_product(grad_phi_nlm[0]); + f_ji[1] += w.real_part_product(grad_phi_nlm[1]); + f_ji[2] += w.real_part_product(grad_phi_nlm[2]); + } + + plm_idx2 = plm_idx1; + dplm_idx2 = dplm_idx1; + + plm_idx1 = plm_idx; + dplm_idx1 = dplm_idx; + + idx_sph++; + } + + plm_idx = plm_idx1 = plm_idx2 = 0.0; + dplm_idx = dplm_idx1 = dplm_idx2 = 0.0; + + // m = 1 + for (int l = 1; l <= lmax; l++) { + // const int idx = l * (l + 1) + 1; // (l, 1) + + if (l == 1) { + // l=1, m=1 + plm_idx = -sq3o2 * Y00; + dplm_idx = 0.0; + } else if (l == 2) { + const double t = dl(l) * plm_idx1; + plm_idx = t * rz; + dplm_idx = t; + } else { + plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2); + dplm_idx = alm(idx_sph) * (plm_idx1 + rz * dplm_idx1 + blm(idx_sph) * dplm_idx2); + } + + ylm = phase * plm_idx; + + dyx.re = plm_idx; + dyx.im = 0.0; + dyy.re = 0.0; + dyy.im = plm_idx; + dyz.re = phase.re * dplm_idx; + dyz.im = phase.im * dplm_idx; + + rdy.re = rx * dyx.re + +rz * dyz.re; + rdy.im = ry * dyy.im + rz * dyz.im; + + dylm[0].re = dyx.re - rdy.re * rx; + dylm[0].im = -rdy.im * rx; + dylm[1].re = -rdy.re * ry; + dylm[1].im = dyy.im - rdy.im * ry; + dylm[2].re = dyz.re - rdy.re * rz; + dylm[2].im = dyz.im - rdy.im * rz; + + for (int n = 0; n < nradmax; n++) { + + const double R_over_r = fr(ii, jj, l, n) * rinv; + const double DR = dfr(ii, jj, l, n); + const complex Y_DR = ylm * DR; + + complex w = weights(ii, mu_j, idx_sph, n); + if (w.re == 0.0 && w.im == 0.0) continue; + // counting for -m cases if m > 0 + w.re *= 2.0; + w.im *= 2.0; + + complex grad_phi_nlm[3]; + grad_phi_nlm[0] = Y_DR * r_hat[0] + dylm[0] * R_over_r; + grad_phi_nlm[1] = Y_DR * r_hat[1] + dylm[1] * R_over_r; + grad_phi_nlm[2] = Y_DR * r_hat[2] + dylm[2] * R_over_r; + // real-part multiplication only + f_ji[0] += w.real_part_product(grad_phi_nlm[0]); + f_ji[1] += w.real_part_product(grad_phi_nlm[1]); + f_ji[2] += w.real_part_product(grad_phi_nlm[2]); + } + + plm_idx2 = plm_idx1; + dplm_idx2 = dplm_idx1; + + plm_idx1 = plm_idx; + dplm_idx1 = dplm_idx; + + idx_sph++; + } + + plm_idx = plm_idx1 = plm_idx2 = 0.0; + dplm_idx = dplm_idx1 = dplm_idx2 = 0.0; + + double plm_mm1_mm1 = -sq3o2 * Y00; // (1, 1) + + // m > 1 + phasem = phase; + for (int m = 2; m <= lmax; m++) { + + mphasem1.re = phasem.re * double(m); + mphasem1.im = phasem.im * double(m); + phasem = phasem * phase; + + for (int l = m; l <= lmax; l++) { + // const int idx = l * (l + 1) + m; + + if (l == m) { + plm_idx = cl(l) * plm_mm1_mm1; // (m+1, m) + dplm_idx = 0.0; + plm_mm1_mm1 = plm_idx; + } else if (l == (m + 1)) { + const double t = dl(l) * plm_mm1_mm1; // (m - 1, m - 1) + plm_idx = t * rz; // (m, m) + dplm_idx = t; + } else { + plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2); + dplm_idx = alm(idx_sph) * (plm_idx1 + rz * dplm_idx1 + blm(idx_sph) * dplm_idx2); + } + + ylm.re = phasem.re * plm_idx; + ylm.im = phasem.im * plm_idx; + + dyx = mphasem1 * plm_idx; + dyy.re = -dyx.im; + dyy.im = dyx.re; + dyz = phasem * dplm_idx; + + rdy.re = rx * dyx.re + ry * dyy.re + rz * dyz.re; + rdy.im = rx * dyx.im + ry * dyy.im + rz * dyz.im; + + dylm[0].re = dyx.re - rdy.re * rx; + dylm[0].im = dyx.im - rdy.im * rx; + dylm[1].re = dyy.re - rdy.re * ry; + dylm[1].im = dyy.im - rdy.im * ry; + dylm[2].re = dyz.re - rdy.re * rz; + dylm[2].im = dyz.im - rdy.im * rz; + + for (int n = 0; n < nradmax; n++) { + + const double R_over_r = fr(ii, jj, l, n) * rinv; + const double DR = dfr(ii, jj, l, n); + const complex Y_DR = ylm * DR; + + complex w = weights(ii, mu_j, idx_sph, n); if (w.re == 0.0 && w.im == 0.0) continue; // counting for -m cases if m > 0 - if (m > 0) { - w.re *= 2.0; - w.im *= 2.0; - } - - complex DY[3]; - DY[0] = dylm(ii, jj, idx, 0); - DY[1] = dylm(ii, jj, idx, 1); - DY[2] = dylm(ii, jj, idx, 2); - const complex Y_DR = ylm(ii, jj, idx) * DR; + w.re *= 2.0; + w.im *= 2.0; complex grad_phi_nlm[3]; - grad_phi_nlm[0] = Y_DR * r_hat[0] + DY[0] * R_over_r; - grad_phi_nlm[1] = Y_DR * r_hat[1] + DY[1] * R_over_r; - grad_phi_nlm[2] = Y_DR * r_hat[2] + DY[2] * R_over_r; + grad_phi_nlm[0] = Y_DR * r_hat[0] + dylm[0] * R_over_r; + grad_phi_nlm[1] = Y_DR * r_hat[1] + dylm[1] * R_over_r; + grad_phi_nlm[2] = Y_DR * r_hat[2] + dylm[2] * R_over_r; // real-part multiplication only f_ji[0] += w.real_part_product(grad_phi_nlm[0]); f_ji[1] += w.real_part_product(grad_phi_nlm[1]); f_ji[2] += w.real_part_product(grad_phi_nlm[2]); } + + plm_idx2 = plm_idx1; + dplm_idx2 = dplm_idx1; + + plm_idx1 = plm_idx; + dplm_idx1 = dplm_idx; + + idx_sph++; } } @@ -1461,31 +1763,46 @@ void PairPACEExtrapolationKokkos::v_tally_xyz(EV_FLOAT &ev, const in template void PairPACEExtrapolationKokkos::pre_compute_harmonics(int lmax) { + auto h_idx_sph = Kokkos::create_mirror_view(d_idx_sph); auto h_alm = Kokkos::create_mirror_view(alm); auto h_blm = Kokkos::create_mirror_view(blm); auto h_cl = Kokkos::create_mirror_view(cl); auto h_dl = Kokkos::create_mirror_view(dl); - for (int l = 1; l <= lmax; l++) { - const double lsq = l * l; - const double ld = 2 * l; - const double l1 = (4 * lsq - 1); - const double l2 = lsq - ld + 1; - for (int m = 0; m < l - 1; m++) { - const double msq = m * m; - const double a = sqrt((double(l1)) / (double(lsq - msq))); - const double b = -sqrt((double(l2 - msq)) / (double(4 * l2 - 1))); + Kokkos::deep_copy(h_idx_sph,-1); + + int idx_sph = 0; + for (int m = 0; m <= lmax; m++) { + const double msq = m * m; + for (int l = m; l <= lmax; l++) { const int idx = l * (l + 1) + m; // (l, m) - h_alm(idx) = a; - h_blm(idx) = b; + h_idx_sph(idx) = idx_sph; + + double a = 0.0; + double b = 0.0; + + if (l > 1 && l != m) { + const double lsq = l * l; + const double ld = 2 * l; + const double l1 = (4 * lsq - 1); + const double l2 = lsq - ld + 1; + + a = sqrt((double(l1)) / (double(lsq - msq))); + b = -sqrt((double(l2 - msq)) / (double(4 * l2 - 1))); + } + h_alm(idx_sph) = a; + h_blm(idx_sph) = b; + idx_sph++; } } + idx_sph_max = idx_sph; for (int l = 1; l <= lmax; l++) { h_cl(l) = -sqrt(1.0 + 0.5 / (double(l))); h_dl(l) = sqrt(double(2 * (l - 1) + 3)); } + Kokkos::deep_copy(d_idx_sph, h_idx_sph); Kokkos::deep_copy(alm, h_alm); Kokkos::deep_copy(blm, h_blm); Kokkos::deep_copy(cl, h_cl); @@ -1494,143 +1811,6 @@ void PairPACEExtrapolationKokkos::pre_compute_harmonics(int lmax) /* ---------------------------------------------------------------------- */ -template -KOKKOS_INLINE_FUNCTION -void PairPACEExtrapolationKokkos::compute_barplm(int ii, int jj, double rz, int lmax) const -{ - // requires -1 <= rz <= 1 , NO CHECKING IS PERFORMED !!!!!!!!! - // prefactors include 1/sqrt(2) factor compared to reference - - // l=0, m=0 - // plm(ii, jj, 0, 0) = Y00/sq1o4pi; //= sq1o4pi; - plm(ii, jj, 0) = Y00; //= 1; - dplm(ii, jj, 0) = 0.0; - - if (lmax > 0) { - - // l=1, m=0 - plm(ii, jj, 2) = Y00 * sq3 * rz; - dplm(ii, jj, 2) = Y00 * sq3; - - // l=1, m=1 - plm(ii, jj, 3) = -sq3o2 * Y00; - dplm(ii, jj, 3) = 0.0; - - // loop l = 2, lmax - for (int l = 2; l <= lmax; l++) { - for (int m = 0; m < l - 1; m++) { - const int idx = l * (l + 1) + m; // (l, m) - const int idx1 = (l - 1) * l + m; // (l - 1, m) - const int idx2 = (l - 2) * (l - 1) + m; // (l - 2, m) - plm(ii, jj, idx) = alm(idx) * (rz * plm(ii, jj, idx1) + blm(idx) * plm(ii, jj, idx2)); - dplm(ii, jj, idx) = alm(idx) * (plm(ii, jj, idx1) + rz * dplm(ii, jj, idx1) + blm(idx) * dplm(ii, jj, idx2)); - } - const int idx = l * (l + 1) + l; // (l, l) - const int idx1 = l * (l + 1) + l - 1; // (l, l - 1) - const int idx2 = (l - 1) * l + l - 1; // (l - 1, l - 1) - const double t = dl(l) * plm(ii, jj, idx2); - plm(ii, jj, idx1) = t * rz; - dplm(ii, jj, idx1) = t; - plm(ii, jj, idx) = cl(l) * plm(ii, jj, idx2); - dplm(ii, jj, idx) = 0.0; - } - } -} - -/* ---------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void PairPACEExtrapolationKokkos::compute_ylm(int ii, int jj, double rx, double ry, double rz, int lmax) const -{ - // requires rx^2 + ry^2 + rz^2 = 1 , NO CHECKING IS PERFORMED !!!!!!!!! - - complex phase; - complex phasem, mphasem1; - complex dyx, dyy, dyz; - complex rdy; - - phase.re = rx; - phase.im = ry; - - // compute barplm - compute_barplm(ii, jj, rz, lmax); - - // m = 0 - for (int l = 0; l <= lmax; l++) { - const int idx = l * (l + 1); - - ylm(ii, jj, idx).re = plm(ii, jj, idx); - ylm(ii, jj, idx).im = 0.0; - - dyz.re = dplm(ii, jj, idx); - rdy.re = dyz.re * rz; - - dylm(ii, jj, idx, 0).re = -rdy.re * rx; - dylm(ii, jj, idx, 0).im = 0.0; - dylm(ii, jj, idx, 1).re = -rdy.re * ry; - dylm(ii, jj, idx, 1).im = 0.0; - dylm(ii, jj, idx, 2).re = dyz.re - rdy.re * rz; - dylm(ii, jj, idx, 2).im = 0; - } - // m = 1 - for (int l = 1; l <= lmax; l++) { - const int idx = l * (l + 1) + 1; - - ylm(ii, jj, idx) = phase * plm(ii, jj, idx); - - dyx.re = plm(ii, jj, idx); - dyx.im = 0.0; - dyy.re = 0.0; - dyy.im = plm(ii, jj, idx); - dyz.re = phase.re * dplm(ii, jj, idx); - dyz.im = phase.im * dplm(ii, jj, idx); - - rdy.re = rx * dyx.re + +rz * dyz.re; - rdy.im = ry * dyy.im + rz * dyz.im; - - dylm(ii, jj, idx, 0).re = dyx.re - rdy.re * rx; - dylm(ii, jj, idx, 0).im = -rdy.im * rx; - dylm(ii, jj, idx, 1).re = -rdy.re * ry; - dylm(ii, jj, idx, 1).im = dyy.im - rdy.im * ry; - dylm(ii, jj, idx, 2).re = dyz.re - rdy.re * rz; - dylm(ii, jj, idx, 2).im = dyz.im - rdy.im * rz; - } - - // m > 1 - phasem = phase; - for (int m = 2; m <= lmax; m++) { - - mphasem1.re = phasem.re * double(m); - mphasem1.im = phasem.im * double(m); - phasem = phasem * phase; - - for (int l = m; l <= lmax; l++) { - const int idx = l * (l + 1) + m; - - ylm(ii, jj, idx).re = phasem.re * plm(ii, jj, idx); - ylm(ii, jj, idx).im = phasem.im * plm(ii, jj, idx); - - dyx = mphasem1 * plm(ii, jj, idx); - dyy.re = -dyx.im; - dyy.im = dyx.re; - dyz = phasem * dplm(ii, jj, idx); - - rdy.re = rx * dyx.re + ry * dyy.re + rz * dyz.re; - rdy.im = rx * dyx.im + ry * dyy.im + rz * dyz.im; - - dylm(ii, jj, idx, 0).re = dyx.re - rdy.re * rx; - dylm(ii, jj, idx, 0).im = dyx.im - rdy.im * rx; - dylm(ii, jj, idx, 1).re = dyy.re - rdy.re * ry; - dylm(ii, jj, idx, 1).im = dyy.im - rdy.im * ry; - dylm(ii, jj, idx, 2).re = dyz.re - rdy.re * rz; - dylm(ii, jj, idx, 2).im = dyz.im - rdy.im * rz; - } - } -} - -/* ---------------------------------------------------------------------- */ - template KOKKOS_INLINE_FUNCTION void PairPACEExtrapolationKokkos::cutoff_func_poly(const double r, const double r_in, const double delta_in, double &fc, double &dfc) const @@ -1759,11 +1939,11 @@ void PairPACEExtrapolationKokkos::evaluate_splines(const int ii, con spline_gk.calcSplines(ii, jj, r, gr, dgr); spline_rnl.calcSplines(ii, jj, r, d_values, d_derivatives); - for (int kk = 0; kk < (int)fr.extent(2); kk++) { - for (int ll = 0; ll < (int)fr.extent(3); ll++) { - const int flatten = kk*fr.extent(3) + ll; - fr(ii, jj, kk, ll) = d_values(ii, jj, flatten); - dfr(ii, jj, kk, ll) = d_derivatives(ii, jj, flatten); + for (int ll = 0; ll < (int)fr.extent(2); ll++) { + for (int kk = 0; kk < (int)fr.extent(3); kk++) { + const int flatten = kk*fr.extent(2) + ll; + fr(ii, jj, ll, kk) = d_values(ii, jj, flatten); + dfr(ii, jj, ll, kk) = d_derivatives(ii, jj, flatten); } } @@ -1783,7 +1963,7 @@ void PairPACEExtrapolationKokkos::SplineInterpolatorKokkos::operator rscalelookup = spline.rscalelookup; num_of_functions = spline.num_of_functions; - lookupTable = t_ace_3d4("lookupTable", ntot+1, num_of_functions); + lookupTable = t_ace_3d4_lr("lookupTable", ntot+1, num_of_functions); auto h_lookupTable = Kokkos::create_mirror_view(lookupTable); for (int i = 0; i < ntot+1; i++) for (int j = 0; j < num_of_functions; j++) @@ -1889,10 +2069,6 @@ double PairPACEExtrapolationKokkos::memory_usage() bytes += MemKK::memory_usage(d_derivatives); bytes += MemKK::memory_usage(cr); bytes += MemKK::memory_usage(dcr); - bytes += MemKK::memory_usage(plm); - bytes += MemKK::memory_usage(dplm); - bytes += MemKK::memory_usage(ylm); - bytes += MemKK::memory_usage(dylm); bytes += MemKK::memory_usage(d_ncount); bytes += MemKK::memory_usage(d_mu); bytes += MemKK::memory_usage(d_rhats); @@ -1911,7 +2087,7 @@ double PairPACEExtrapolationKokkos::memory_usage() bytes += MemKK::memory_usage(d_idx_ms_combs_count); bytes += MemKK::memory_usage(d_rank); bytes += MemKK::memory_usage(d_num_ms_combs); - bytes += MemKK::memory_usage(d_func_inds); + bytes += MemKK::memory_usage(d_idx_funcs); bytes += MemKK::memory_usage(d_mus); bytes += MemKK::memory_usage(d_ns); bytes += MemKK::memory_usage(d_ls); @@ -1940,47 +2116,6 @@ double PairPACEExtrapolationKokkos::memory_usage() return bytes; } -/* ---------------------------------------------------------------------- - extract method for extracting value of scale variable - ---------------------------------------------------------------------- */ - -template -void *PairPACEExtrapolationKokkos::extract(const char *str, int &dim) -{ - dim = 0; - //check if str=="flag_compute_extrapolation_grade" then compute extrapolation grades on this iteration - if (strcmp(str, "gamma_flag") == 0) return (void *) &flag_compute_extrapolation_grade; - if (strcmp(str, "corerep_flag") == 0) return (void *) &flag_corerep_factor; - - dim = 2; - if (strcmp(str, "scale") == 0) return (void *) scale; - return nullptr; -} - -/* ---------------------------------------------------------------------- - peratom requests from FixPair - return ptr to requested data - also return ncol = # of quantites per atom - 0 = per-atom vector - 1 or more = # of columns in per-atom array - return NULL if str is not recognized ----------------------------------------------------------------------- */ - -template -void *PairPACEExtrapolationKokkos::extract_peratom(const char *str, int &ncol) -{ - if (strcmp(str, "gamma") == 0) { - ncol = 0; - return (void *) extrapolation_grade_gamma; - } - if (strcmp(str, "corerep") == 0) { - ncol = 0; - return (void *) corerep_factor; - } - - return nullptr; -} - /* ---------------------------------------------------------------------- */ namespace LAMMPS_NS { @@ -1989,4 +2124,3 @@ template class PairPACEExtrapolationKokkos; template class PairPACEExtrapolationKokkos; #endif } - diff --git a/src/KOKKOS/pair_pace_extrapolation_kokkos.h b/src/KOKKOS/pair_pace_extrapolation_kokkos.h index aa6c49c36d..df8a0c1740 100644 --- a/src/KOKKOS/pair_pace_extrapolation_kokkos.h +++ b/src/KOKKOS/pair_pace_extrapolation_kokkos.h @@ -36,7 +36,6 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation { public: struct TagPairPACEComputeNeigh{}; struct TagPairPACEComputeRadial{}; - struct TagPairPACEComputeYlm{}; struct TagPairPACEComputeAi{}; struct TagPairPACEConjugateAi{}; struct TagPairPACEComputeRho{}; @@ -67,9 +66,6 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation { KOKKOS_INLINE_FUNCTION void operator() (TagPairPACEComputeRadial,const typename Kokkos::TeamPolicy::member_type& team) const; - KOKKOS_INLINE_FUNCTION - void operator() (TagPairPACEComputeYlm,const typename Kokkos::TeamPolicy::member_type& team) const; - KOKKOS_INLINE_FUNCTION void operator() (TagPairPACEComputeAi,const typename Kokkos::TeamPolicy::member_type& team) const; @@ -99,12 +95,8 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation { KOKKOS_INLINE_FUNCTION void operator() (TagPairPACEComputeForce,const int& ii, EV_FLOAT&) const; - - void *extract(const char *str, int &dim) override; - void *extract_peratom(const char *str, int &ncol) override; - protected: - int inum, maxneigh, chunk_size, chunk_offset, idx_ms_combs_max, total_num_functions_max; + int inum, maxneigh, chunk_size, chunk_offset, idx_ms_combs_max, total_num_functions_max, idx_sph_max; int host_flag; int eflag, vflag; @@ -165,12 +157,6 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation { const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const; - KOKKOS_INLINE_FUNCTION - void compute_barplm(int, int, double, int) const; - - KOKKOS_INLINE_FUNCTION - void compute_ylm(int, int, double, double, double, int) const; - KOKKOS_INLINE_FUNCTION void cutoff_func_poly(const double, const double, const double, double &, double &) const; @@ -202,15 +188,19 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation { typedef Kokkos::View t_ace_1i; typedef Kokkos::View t_ace_2i; + typedef Kokkos::View t_ace_2i_lr; typedef Kokkos::View t_ace_3i; + typedef Kokkos::View t_ace_3i_lr; typedef Kokkos::View t_ace_4i; typedef Kokkos::View t_ace_1d; typedef Kokkos::View t_ace_2d; + typedef Kokkos::View t_ace_2d_lr; typedef Kokkos::View t_ace_2d3; typedef Kokkos::View t_ace_3d; typedef Kokkos::View tc_ace_3d; typedef Kokkos::View t_ace_3d3; typedef Kokkos::View t_ace_3d4; + typedef Kokkos::View t_ace_3d4_lr; typedef Kokkos::View t_ace_4d; typedef Kokkos::View t_ace_1c; typedef Kokkos::View t_ace_2c; @@ -260,25 +250,16 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation { th_ace_1d h_gamma; // Spherical Harmonics + void pre_compute_harmonics(int); - KOKKOS_INLINE_FUNCTION - void compute_barplm(double rz, int lmaxi); - - KOKKOS_INLINE_FUNCTION - void compute_ylm(double rx, double ry, double rz, int lmaxi); - + t_ace_4c A_sph; + t_ace_1d d_idx_sph; t_ace_1d alm; t_ace_1d blm; t_ace_1d cl; t_ace_1d dl; - t_ace_3d plm; - t_ace_3d dplm; - - t_ace_3c ylm; - t_ace_4c3 dylm; - // short neigh list t_ace_1i d_ncount; t_ace_2d d_mu; @@ -297,20 +278,19 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation { t_ace_1d d_rho_core_cutoff; t_ace_1d d_drho_core_cutoff; t_ace_1d d_E0vals; - t_ace_2d d_wpre; - t_ace_2d d_mexp; + t_ace_2d_lr d_wpre; + t_ace_2d_lr d_mexp; // tilde t_ace_1i d_idx_ms_combs_count; t_ace_1i d_total_basis_size; - t_ace_2i d_rank; - t_ace_2i d_num_ms_combs; - t_ace_2i d_func_inds; - t_ace_3i d_mus; - t_ace_3i d_ns; - t_ace_3i d_ls; - t_ace_3i d_ms_combs; -// t_ace_3d d_ctildes; + t_ace_2i_lr d_rank; + t_ace_2i_lr d_num_ms_combs; + t_ace_2i_lr d_idx_funcs; + t_ace_3i_lr d_mus; + t_ace_3i_lr d_ns; + t_ace_3i_lr d_ls; + t_ace_3i_lr d_ms_combs; t_ace_2d d_gen_cgs; t_ace_3d d_coeffs; @@ -321,12 +301,12 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation { int ntot, nlut, num_of_functions; double cutoff, deltaSplineBins, invrscalelookup, rscalelookup; - t_ace_3d4 lookupTable; + t_ace_3d4_lr lookupTable; void operator=(const SplineInterpolator &spline); void deallocate() { - lookupTable = t_ace_3d4(); + lookupTable = t_ace_3d4_lr(); } double memory_usage() { diff --git a/src/KOKKOS/pair_pace_kokkos.cpp b/src/KOKKOS/pair_pace_kokkos.cpp index 805d7f68bb..4046649375 100644 --- a/src/KOKKOS/pair_pace_kokkos.cpp +++ b/src/KOKKOS/pair_pace_kokkos.cpp @@ -29,11 +29,13 @@ #include "neighbor_kokkos.h" #include "neigh_request.h" +#include "ace-evaluator/ace_version.h" +#include "ace-evaluator/ace_radial.h" + #include "ace-evaluator/ace_c_basis.h" #include "ace-evaluator/ace_evaluator.h" #include "ace-evaluator/ace_recursive.h" -#include "ace-evaluator/ace_version.h" -#include "ace-evaluator/ace_radial.h" + #include namespace LAMMPS_NS { @@ -104,18 +106,19 @@ void PairPACEKokkos::grow(int natom, int maxneigh) if ((int)A.extent(0) < natom) { - MemKK::realloc_kokkos(A, "pace:A", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(A_sph, "pace:A_sph", natom, nelements, idx_sph_max, nradmax + 1); + MemKK::realloc_kokkos(A, "pace:A", natom, nelements, (lmax + 1) * (lmax + 1), nradmax + 1); MemKK::realloc_kokkos(A_rank1, "pace:A_rank1", natom, nelements, nradbase); - MemKK::realloc_kokkos(A_list, "pace:A_list", natom, idx_rho_max, basis_set->rankmax); + MemKK::realloc_kokkos(A_list, "pace:A_list", natom, idx_ms_combs_max, basis_set->rankmax); //size is +1 of max to avoid out-of-boundary array access in double-triangular scheme - MemKK::realloc_kokkos(A_forward_prod, "pace:A_forward_prod", natom, idx_rho_max, basis_set->rankmax + 1); + MemKK::realloc_kokkos(A_forward_prod, "pace:A_forward_prod", natom, idx_ms_combs_max, basis_set->rankmax + 1); MemKK::realloc_kokkos(e_atom, "pace:e_atom", natom); MemKK::realloc_kokkos(rhos, "pace:rhos", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion MemKK::realloc_kokkos(dF_drho, "pace:dF_drho", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion - MemKK::realloc_kokkos(weights, "pace:weights", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(weights, "pace:weights", natom, nelements, idx_sph_max, nradmax + 1); MemKK::realloc_kokkos(weights_rank1, "pace:weights_rank1", natom, nelements, nradbase); // hard-core repulsion @@ -126,14 +129,14 @@ void PairPACEKokkos::grow(int natom, int maxneigh) MemKK::realloc_kokkos(d_jj_min, "pace:j_min_pair", natom); MemKK::realloc_kokkos(d_corerep, "pace:corerep", natom); // per-atom corerep - MemKK::realloc_kokkos(dB_flatten, "pace:dB_flatten", natom, idx_rho_max, basis_set->rankmax); + MemKK::realloc_kokkos(dB_flatten, "pace:dB_flatten", natom, idx_ms_combs_max, basis_set->rankmax); } - if (((int)ylm.extent(0) < natom) || ((int)ylm.extent(1) < maxneigh)) { + if (((int)fr.extent(0) < natom) || ((int)fr.extent(1) < maxneigh)) { // radial functions - MemKK::realloc_kokkos(fr, "pace:fr", natom, maxneigh, nradmax, lmax + 1); - MemKK::realloc_kokkos(dfr, "pace:dfr", natom, maxneigh, nradmax, lmax + 1); + MemKK::realloc_kokkos(fr, "pace:fr", natom, maxneigh, lmax + 1, nradmax); + MemKK::realloc_kokkos(dfr, "pace:dfr", natom, maxneigh, lmax + 1, nradmax); MemKK::realloc_kokkos(gr, "pace:gr", natom, maxneigh, nradbase); MemKK::realloc_kokkos(dgr, "pace:dgr", natom, maxneigh, nradbase); const int max_num_functions = MAX(nradbase, nradmax*(lmax + 1)); @@ -144,12 +147,6 @@ void PairPACEKokkos::grow(int natom, int maxneigh) MemKK::realloc_kokkos(cr, "pace:cr", natom, maxneigh); MemKK::realloc_kokkos(dcr, "pace:dcr", natom, maxneigh); - // spherical harmonics - MemKK::realloc_kokkos(plm, "pace:plm", natom, maxneigh, (lmax + 1) * (lmax + 1)); - MemKK::realloc_kokkos(dplm, "pace:dplm", natom, maxneigh, (lmax + 1) * (lmax + 1)); - MemKK::realloc_kokkos(ylm, "pace:ylm", natom, maxneigh, (lmax + 1) * (lmax + 1)); - MemKK::realloc_kokkos(dylm, "pace:dylm", natom, maxneigh, (lmax + 1) * (lmax + 1)); - // short neigh list MemKK::realloc_kokkos(d_ncount, "pace:ncount", natom); MemKK::realloc_kokkos(d_mu, "pace:mu", natom, maxneigh); @@ -184,7 +181,7 @@ void PairPACEKokkos::copy_pertype() h_rho_core_cutoff[n] = basis_set->map_embedding_specifications.at(n).rho_core_cutoff; h_drho_core_cutoff[n] = basis_set->map_embedding_specifications.at(n).drho_core_cutoff; - h_E0vals(n)= basis_set->E0vals(n); + h_E0vals(n) = basis_set->E0vals(n); h_ndensity(n) = basis_set->map_embedding_specifications.at(n).ndensity; @@ -225,10 +222,10 @@ void PairPACEKokkos::copy_pertype() auto h_dcut_in = Kokkos::create_mirror_view(d_dcut_in); for (int mu_i = 0; mu_i < nelements; ++mu_i) { - for (int mu_j = 0; mu_j < nelements; ++mu_j) { - h_cut_in(mu_i,mu_j) = basis_set->map_bond_specifications.at({mu_i,mu_j}).rcut_in; - h_dcut_in(mu_i,mu_j) = basis_set->map_bond_specifications.at({mu_i,mu_j}).dcut_in; - } + for (int mu_j = 0; mu_j < nelements; ++mu_j) { + h_cut_in(mu_i,mu_j) = basis_set->map_bond_specifications.at({mu_i,mu_j}).rcut_in; + h_dcut_in(mu_i,mu_j) = basis_set->map_bond_specifications.at({mu_i,mu_j}).dcut_in; + } } Kokkos::deep_copy(d_cut_in, h_cut_in); Kokkos::deep_copy(d_dcut_in, h_dcut_in); @@ -288,50 +285,50 @@ void PairPACEKokkos::copy_tilde() // flatten loops, get per-element count and max - idx_rho_max = 0; + idx_ms_combs_max = 0; int total_basis_size_max = 0; - MemKK::realloc_kokkos(d_idx_rho_count, "pace:idx_rho_count", nelements); - auto h_idx_rho_count = Kokkos::create_mirror_view(d_idx_rho_count); + MemKK::realloc_kokkos(d_idx_ms_combs_count, "pace:idx_ms_combs_count", nelements); + auto h_idx_ms_combs_count = Kokkos::create_mirror_view(d_idx_ms_combs_count); - for (int n = 0; n < nelements; n++) { - int idx_rho = 0; - const int total_basis_size_rank1 = basis_set->total_basis_size_rank1[n]; - const int total_basis_size = basis_set->total_basis_size[n]; + for (int mu = 0; mu < nelements; mu++) { + int idx_ms_combs = 0; + const int total_basis_size_rank1 = basis_set->total_basis_size_rank1[mu]; + const int total_basis_size = basis_set->total_basis_size[mu]; - ACECTildeBasisFunction *basis = basis_set->basis[n]; + ACECTildeBasisFunction *basis = basis_set->basis[mu]; // rank=1 for (int func_rank1_ind = 0; func_rank1_ind < total_basis_size_rank1; ++func_rank1_ind) - idx_rho++; + idx_ms_combs++; // rank > 1 - for (int func_ind = 0; func_ind < total_basis_size; ++func_ind) { - ACECTildeBasisFunction *func = &basis[func_ind]; + for (int idx_func = 0; idx_func < total_basis_size; ++idx_func) { + ACECTildeBasisFunction *func = &basis[idx_func]; // loop over {ms} combinations in sum for (int ms_ind = 0; ms_ind < func->num_ms_combs; ++ms_ind) - idx_rho++; + idx_ms_combs++; } - h_idx_rho_count(n) = idx_rho; - idx_rho_max = MAX(idx_rho_max, idx_rho); + h_idx_ms_combs_count(mu) = idx_ms_combs; + idx_ms_combs_max = MAX(idx_ms_combs_max, idx_ms_combs); total_basis_size_max = MAX(total_basis_size_max, total_basis_size_rank1 + total_basis_size); } - Kokkos::deep_copy(d_idx_rho_count, h_idx_rho_count); + Kokkos::deep_copy(d_idx_ms_combs_count, h_idx_ms_combs_count); MemKK::realloc_kokkos(d_rank, "pace:rank", nelements, total_basis_size_max); MemKK::realloc_kokkos(d_num_ms_combs, "pace:num_ms_combs", nelements, total_basis_size_max); - MemKK::realloc_kokkos(d_offsets, "pace:offsets", nelements, idx_rho_max); + MemKK::realloc_kokkos(d_idx_funcs, "pace:idx_func", nelements, idx_ms_combs_max); MemKK::realloc_kokkos(d_mus, "pace:mus", nelements, total_basis_size_max, basis_set->rankmax); MemKK::realloc_kokkos(d_ns, "pace:ns", nelements, total_basis_size_max, basis_set->rankmax); MemKK::realloc_kokkos(d_ls, "pace:ls", nelements, total_basis_size_max, basis_set->rankmax); - MemKK::realloc_kokkos(d_ms_combs, "pace:ms_combs", nelements, idx_rho_max, basis_set->rankmax); - MemKK::realloc_kokkos(d_ctildes, "pace:ctildes", nelements, idx_rho_max, basis_set->ndensitymax); + MemKK::realloc_kokkos(d_ms_combs, "pace:ms_combs", nelements, idx_ms_combs_max, basis_set->rankmax); + MemKK::realloc_kokkos(d_ctildes, "pace:ctildes", nelements, idx_ms_combs_max, basis_set->ndensitymax); auto h_rank = Kokkos::create_mirror_view(d_rank); auto h_num_ms_combs = Kokkos::create_mirror_view(d_num_ms_combs); - auto h_offsets = Kokkos::create_mirror_view(d_offsets); + auto h_idx_funcs = Kokkos::create_mirror_view(d_idx_funcs); auto h_mus = Kokkos::create_mirror_view(d_mus); auto h_ns = Kokkos::create_mirror_view(d_ns); auto h_ls = Kokkos::create_mirror_view(d_ls); @@ -340,63 +337,66 @@ void PairPACEKokkos::copy_tilde() // copy values on host - for (int n = 0; n < nelements; n++) { - const int total_basis_size_rank1 = basis_set->total_basis_size_rank1[n]; - const int total_basis_size = basis_set->total_basis_size[n]; + for (int mu = 0; mu < nelements; mu++) { + const int total_basis_size_rank1 = basis_set->total_basis_size_rank1[mu]; + const int total_basis_size = basis_set->total_basis_size[mu]; - ACECTildeBasisFunction *basis_rank1 = basis_set->basis_rank1[n]; - ACECTildeBasisFunction *basis = basis_set->basis[n]; + ACECTildeBasisFunction *basis_rank1 = basis_set->basis_rank1[mu]; + ACECTildeBasisFunction *basis = basis_set->basis[mu]; - const int ndensity = basis_set->map_embedding_specifications.at(n).ndensity; + const int ndensity = basis_set->map_embedding_specifications.at(mu).ndensity; - int idx_rho = 0; + int idx_ms_combs = 0; // rank=1 - for (int offset = 0; offset < total_basis_size_rank1; ++offset) { - ACECTildeBasisFunction *func = &basis_rank1[offset]; - h_rank(n, offset) = 1; - h_mus(n, offset, 0) = func->mus[0]; - h_ns(n, offset, 0) = func->ns[0]; - for (int p = 0; p < ndensity; p++) - h_ctildes(n, idx_rho, p) = func->ctildes[p]; - h_offsets(n, idx_rho) = offset; - idx_rho++; + for (int idx_func = 0; idx_func < total_basis_size_rank1; ++idx_func) { + ACECTildeBasisFunction *func = &basis_rank1[idx_func]; + h_rank(mu, idx_func) = 1; + h_mus(mu, idx_func, 0) = func->mus[0]; + h_ns(mu, idx_func, 0) = func->ns[0]; + + for (int p = 0; p < ndensity; ++p) + h_ctildes(mu, idx_ms_combs, p) = func->ctildes[p]; + + h_idx_funcs(mu, idx_ms_combs) = idx_func; + idx_ms_combs++; } // rank > 1 - for (int func_ind = 0; func_ind < total_basis_size; ++func_ind) { - ACECTildeBasisFunction *func = &basis[func_ind]; + for (int idx_func = 0; idx_func < total_basis_size; ++idx_func) { + ACECTildeBasisFunction *func = &basis[idx_func]; // TODO: check if func->ctildes are zero, then skip - const int offset = total_basis_size_rank1 + func_ind; + const int idx_func_through = total_basis_size_rank1 + idx_func; - const int rank = h_rank(n, offset) = func->rank; - h_num_ms_combs(n, offset) = func->num_ms_combs; + const int rank = h_rank(mu, idx_func_through) = func->rank; + h_num_ms_combs(mu, idx_func_through) = func->num_ms_combs; for (int t = 0; t < rank; t++) { - h_mus(n, offset, t) = func->mus[t]; - h_ns(n, offset, t) = func->ns[t]; - h_ls(n, offset, t) = func->ls[t]; + h_mus(mu, idx_func_through, t) = func->mus[t]; + h_ns(mu, idx_func_through, t) = func->ns[t]; + h_ls(mu, idx_func_through, t) = func->ls[t]; } // loop over {ms} combinations in sum for (int ms_ind = 0; ms_ind < func->num_ms_combs; ++ms_ind) { auto ms = &func->ms_combs[ms_ind * rank]; // current ms-combination (of length = rank) for (int t = 0; t < rank; t++) - h_ms_combs(n, idx_rho, t) = ms[t]; + h_ms_combs(mu, idx_ms_combs, t) = ms[t]; for (int p = 0; p < ndensity; ++p) { // real-part only multiplication - h_ctildes(n, idx_rho, p) = func->ctildes[ms_ind * ndensity + p]; + h_ctildes(mu, idx_ms_combs, p) = func->ctildes[ms_ind * ndensity + p]; } - h_offsets(n, idx_rho) = offset; - idx_rho++; + + h_idx_funcs(mu, idx_ms_combs) = idx_func_through; + idx_ms_combs++; } } } Kokkos::deep_copy(d_rank, h_rank); Kokkos::deep_copy(d_num_ms_combs, h_num_ms_combs); - Kokkos::deep_copy(d_offsets, h_offsets); + Kokkos::deep_copy(d_idx_funcs, h_idx_funcs); Kokkos::deep_copy(d_mus, h_mus); Kokkos::deep_copy(d_ns, h_ns); Kokkos::deep_copy(d_ls, h_ls); @@ -443,6 +443,7 @@ void PairPACEKokkos::init_style() // spherical harmonics + MemKK::realloc_kokkos(d_idx_sph, "pace:idx_sph", (lmax + 1) * (lmax + 1)); MemKK::realloc_kokkos(alm, "pace:alm", (lmax + 1) * (lmax + 1)); MemKK::realloc_kokkos(blm, "pace:blm", (lmax + 1) * (lmax + 1)); MemKK::realloc_kokkos(cl, "pace:cl", lmax + 1); @@ -613,7 +614,7 @@ void PairPACEKokkos::compute(int eflag_in, int vflag_in) Kokkos::deep_copy(weights, 0.0); Kokkos::deep_copy(weights_rank1, 0.0); - Kokkos::deep_copy(A, 0.0); + Kokkos::deep_copy(A_sph, 0.0); Kokkos::deep_copy(A_rank1, 0.0); Kokkos::deep_copy(rhos, 0.0); Kokkos::deep_copy(rho_core, 0.0); @@ -646,15 +647,6 @@ void PairPACEKokkos::compute(int eflag_in, int vflag_in) Kokkos::parallel_for("ComputeRadial",policy_radial,*this); } - //ComputeYlm - { - int vector_length = vector_length_default; - int team_size = 16; - check_team_size_for(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); - typename Kokkos::TeamPolicy policy_ylm(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); - Kokkos::parallel_for("ComputeYlm",policy_ylm,*this); - } - //ComputeAi { int vector_length = vector_length_default; @@ -672,7 +664,7 @@ void PairPACEKokkos::compute(int eflag_in, int vflag_in) //ComputeRho { - typename Kokkos::RangePolicy policy_rho(0,chunk_size*idx_rho_max); + typename Kokkos::RangePolicy policy_rho(0,chunk_size*idx_ms_combs_max); Kokkos::parallel_for("ComputeRho",policy_rho,*this); } @@ -684,7 +676,7 @@ void PairPACEKokkos::compute(int eflag_in, int vflag_in) //ComputeWeights { - typename Kokkos::RangePolicy policy_weights(0,chunk_size*idx_rho_max); + typename Kokkos::RangePolicy policy_weights(0,chunk_size * idx_ms_combs_max); Kokkos::parallel_for("ComputeWeights",policy_weights,*this); } @@ -693,7 +685,7 @@ void PairPACEKokkos::compute(int eflag_in, int vflag_in) int vector_length = vector_length_default; int team_size = team_size_default; check_team_size_for(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); - typename Kokkos::TeamPolicy policy_derivative(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); + typename Kokkos::TeamPolicy policy_derivative(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); Kokkos::parallel_for("ComputeDerivative",policy_derivative,*this); } @@ -726,7 +718,6 @@ void PairPACEKokkos::compute(int eflag_in, int vflag_in) } chunk_offset += chunk_size; - } // end while if (need_dup) @@ -842,25 +833,24 @@ void PairPACEKokkos::operator() (TagPairPACEComputeNeigh,const typen }); if (is_zbl) { - //adapted from https://www.osti.gov/servlets/purl/1429450 - if(ncount>0) { - using minloc_value_type=Kokkos::MinLoc::value_type; - minloc_value_type djjmin; - djjmin.val=1e20; - djjmin.loc=-1; - Kokkos::MinLoc reducer_scalar(djjmin); - // loop over ncount (actual neighbours withing cutoff) rather than jnum (total number of neigh in cutoff+skin) - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, ncount), + //adapted from https://www.osti.gov/servlets/purl/1429450 + if (ncount > 0) { + using minloc_value_type=Kokkos::MinLoc::value_type; + minloc_value_type djjmin; + djjmin.val=1e20; + djjmin.loc=-1; + Kokkos::MinLoc reducer_scalar(djjmin); + // loop over ncount (actual neighbours withing cutoff) rather than jnum (total number of neigh in cutoff+skin) + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, ncount), [&](const int offset, minloc_value_type &min_d_dist) { int j = d_nearest(ii,offset); j &= NEIGHMASK; - const int jtype = type(j); auto r = d_rnorms(ii,offset); const int mu_j = d_map(type(j)); const F_FLOAT d = r - (d_cut_in(mu_i, mu_j) - d_dcut_in(mu_i, mu_j)); if (d < min_d_dist.val) { - min_d_dist.val = d; - min_d_dist.loc = offset; + min_d_dist.val = d; + min_d_dist.loc = offset; } }, reducer_scalar); d_d_min(ii) = djjmin.val; @@ -898,28 +888,6 @@ void PairPACEKokkos::operator() (TagPairPACEComputeRadial, const typ /* ---------------------------------------------------------------------- */ -template -KOKKOS_INLINE_FUNCTION -void PairPACEKokkos::operator() (TagPairPACEComputeYlm, const typename Kokkos::TeamPolicy::member_type& team) const -{ - // Extract the atom number - int ii = team.team_rank() + team.team_size() * (team.league_rank() % - ((chunk_size+team.team_size()-1)/team.team_size())); - if (ii >= chunk_size) return; - - // Extract the neighbor number - const int jj = team.league_rank() / ((chunk_size+team.team_size()-1)/team.team_size()); - const int ncount = d_ncount(ii); - if (jj >= ncount) return; - - const double xn = d_rhats(ii, jj, 0); - const double yn = d_rhats(ii, jj, 1); - const double zn = d_rhats(ii, jj, 2); - compute_ylm(ii,jj,xn,yn,zn,lmax); -} - -/* ---------------------------------------------------------------------- */ - template KOKKOS_INLINE_FUNCTION void PairPACEKokkos::operator() (TagPairPACEComputeAi, const typename Kokkos::TeamPolicy::member_type& team) const @@ -941,13 +909,127 @@ void PairPACEKokkos::operator() (TagPairPACEComputeAi, const typenam Kokkos::atomic_add(&A_rank1(ii, mu_j, n), gr(ii, jj, n) * Y00); // rank > 1 - for (int n = 0; n < nradmax; n++) { - for (int l = 0; l <= lmax; l++) { - for (int m = 0; m <= l; m++) { - const int idx = l * (l + 1) + m; // (l, m) - Kokkos::atomic_add(&A(ii, mu_j, n, idx).re, fr(ii, jj, n, l) * ylm(ii, jj, idx).re); - Kokkos::atomic_add(&A(ii, mu_j, n, idx).im, fr(ii, jj, n, l) * ylm(ii, jj, idx).im); + + // Compute plm and ylm + + // requires rx^2 + ry^2 + rz^2 = 1 , NO CHECKING IS PERFORMED !!!!!!!!! + // requires -1 <= rz <= 1 , NO CHECKING IS PERFORMED !!!!!!!!! + // prefactors include 1/sqrt(2) factor compared to reference + + complex ylm, phase; + complex phasem, mphasem1; + complex dyx, dyy, dyz; + complex rdy; + + const double rx = d_rhats(ii, jj, 0); + const double ry = d_rhats(ii, jj, 1); + const double rz = d_rhats(ii, jj, 2); + + phase.re = rx; + phase.im = ry; + + double plm_idx,plm_idx1,plm_idx2; + + plm_idx = plm_idx1 = plm_idx2 = 0.0; + + int idx_sph = 0; + + // m = 0 + for (int l = 0; l <= lmax; l++) { + // const int idx = l * (l + 1); + + if (l == 0) { + // l=0, m=0 + // plm[0] = Y00/sq1o4pi; //= sq1o4pi; + plm_idx = Y00; //= 1; + } else if (l == 1) { + // l=1, m=0 + plm_idx = Y00 * sq3 * rz; + } else { + // l>=2, m=0 + plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2); + } + + ylm.re = plm_idx; + ylm.im = 0.0; + + for (int n = 0; n < nradmax; n++) { + Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).re, fr(ii, jj, l, n) * ylm.re); + Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).im, fr(ii, jj, l, n) * ylm.im); + } + + plm_idx2 = plm_idx1; + plm_idx1 = plm_idx; + + idx_sph++; + } + + plm_idx = plm_idx1 = plm_idx2 = 0.0; + + // m = 1 + for (int l = 1; l <= lmax; l++) { + // const int idx = l * (l + 1) + 1; // (l, 1) + + if (l == 1) { + // l=1, m=1 + plm_idx = -sq3o2 * Y00; + } else if (l == 2) { + const double t = dl(l) * plm_idx1; + plm_idx = t * rz; + } else { + plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2); + } + + ylm = phase * plm_idx; + + for (int n = 0; n < nradmax; n++) { + Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).re, fr(ii, jj, l, n) * ylm.re); + Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).im, fr(ii, jj, l, n) * ylm.im); + } + + plm_idx2 = plm_idx1; + plm_idx1 = plm_idx; + + idx_sph++; + } + + plm_idx = plm_idx1 = plm_idx2 = 0.0; + + double plm_mm1_mm1 = -sq3o2 * Y00; // (1, 1) + + // m > 1 + phasem = phase; + for (int m = 2; m <= lmax; m++) { + + mphasem1.re = phasem.re * double(m); + mphasem1.im = phasem.im * double(m); + phasem = phasem * phase; + + for (int l = m; l <= lmax; l++) { + // const int idx = l * (l + 1) + m; + + if (l == m) { + plm_idx = cl(l) * plm_mm1_mm1; // (m+1, m) + plm_mm1_mm1 = plm_idx; + } else if (l == (m + 1)) { + const double t = dl(l) * plm_mm1_mm1; // (m - 1, m - 1) + plm_idx = t * rz; // (m, m) + } else { + plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2); } + + ylm.re = phasem.re * plm_idx; + ylm.im = phasem.im * plm_idx; + + for (int n = 0; n < nradmax; n++) { + Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).re, fr(ii, jj, l, n) * ylm.re); + Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).im, fr(ii, jj, l, n) * ylm.im); + } + + plm_idx2 = plm_idx1; + plm_idx1 = plm_idx; + + idx_sph++; } } @@ -961,17 +1043,35 @@ template KOKKOS_INLINE_FUNCTION void PairPACEKokkos::operator() (TagPairPACEConjugateAi, const int& ii) const { - //complex conjugate A's (for NEGATIVE (-m) terms) - // for rank > 1 for (int mu_j = 0; mu_j < nelements; mu_j++) { - for (int n = 0; n < nradmax; n++) { - for (int l = 0; l <= lmax; l++) { + + // transpose + + int idx_sph = 0; + + for (int m = 0; m <= lmax; m++) { + for (int l = m; l <= lmax; l++) { + const int idx = l * (l + 1) + m; + for (int n = 0; n < nradmax; n++) { + A(ii, mu_j, idx, n) = A_sph(ii, mu_j, idx_sph, n); + } + + idx_sph++; + } + } + + // complex conjugate A's (for NEGATIVE (-m) terms) + // for rank > 1 + + for (int l = 0; l <= lmax; l++) { //fill in -m part in the outer loop using the same m <-> -m symmetry as for Ylm - for (int m = 1; m <= l; m++) { - const int idx = l * (l + 1) + m; // (l, m) - const int idxm = l * (l + 1) - m; // (l, -m) - const int factor = m % 2 == 0 ? 1 : -1; - A(ii, mu_j, n, idxm) = A(ii, mu_j, n, idx).conj() * (double)factor; + for (int m = 1; m <= l; m++) { + const int idx = l * (l + 1) + m; // (l, m) + const int idxm = l * (l + 1) - m; // (l, -m) + const int idx_sph = d_idx_sph(idx); + const int factor = m % 2 == 0 ? 1 : -1; + for (int n = 0; n < nradmax; n++) { + A(ii, mu_j, idxm, n) = A_sph(ii, mu_j, idx_sph, n).conj() * (double)factor; } } } @@ -984,70 +1084,69 @@ template KOKKOS_INLINE_FUNCTION void PairPACEKokkos::operator() (TagPairPACEComputeRho, const int& iter) const { - const int idx_rho = iter / chunk_size; + const int idx_ms_combs = iter / chunk_size; const int ii = iter % chunk_size; const int i = d_ilist[ii + chunk_offset]; const int mu_i = d_map(type(i)); - if (idx_rho >= d_idx_rho_count(mu_i)) return; + if (idx_ms_combs >= d_idx_ms_combs_count(mu_i)) return; const int ndensity = d_ndensity(mu_i); - const int offset = d_offsets(mu_i, idx_rho); - const int rank = d_rank(mu_i, offset); + const int idx_func = d_idx_funcs(mu_i, idx_ms_combs); + const int rank = d_rank(mu_i, idx_func); const int r = rank - 1; // Basis functions B with iterative product and density rho(p) calculation if (rank == 1) { - const int mu = d_mus(mu_i, offset, 0); - const int n = d_ns(mu_i, offset, 0); + const int mu = d_mus(mu_i, idx_func, 0); + const int n = d_ns(mu_i, idx_func, 0); double A_cur = A_rank1(ii, mu, n - 1); for (int p = 0; p < ndensity; ++p) { //for rank=1 (r=0) only 1 ms-combination exists (ms_ind=0), so index of func.ctildes is 0..ndensity-1 - Kokkos::atomic_add(&rhos(ii, p), d_ctildes(mu_i, idx_rho, p) * A_cur); + Kokkos::atomic_add(&rhos(ii, p), d_ctildes(mu_i, idx_ms_combs, p) * A_cur); } } else { // rank > 1 // loop over {ms} combinations in sum // loop over m, collect B = product of A with given ms - A_forward_prod(ii, idx_rho, 0) = complex::one(); + A_forward_prod(ii, idx_ms_combs, 0) = complex::one(); // fill forward A-product triangle for (int t = 0; t < rank; t++) { //TODO: optimize ns[t]-1 -> ns[t] during functions construction - const int mu = d_mus(mu_i, offset, t); - const int n = d_ns(mu_i, offset, t); - const int l = d_ls(mu_i, offset, t); - const int m = d_ms_combs(mu_i, idx_rho, t); // current ms-combination (of length = rank) + const int mu = d_mus(mu_i, idx_func, t); + const int n = d_ns(mu_i, idx_func, t); + const int l = d_ls(mu_i, idx_func, t); + const int m = d_ms_combs(mu_i, idx_ms_combs, t); // current ms-combination (of length = rank) const int idx = l * (l + 1) + m; // (l, m) - A_list(ii, idx_rho, t) = A(ii, mu, n - 1, idx); - A_forward_prod(ii, idx_rho, t + 1) = A_forward_prod(ii, idx_rho, t) * A_list(ii, idx_rho, t); + A_list(ii, idx_ms_combs, t) = A(ii, mu, idx, n - 1); + A_forward_prod(ii, idx_ms_combs, t + 1) = A_forward_prod(ii, idx_ms_combs, t) * A_list(ii, idx_ms_combs, t); } complex A_backward_prod = complex::one(); // fill backward A-product triangle for (int t = r; t >= 1; t--) { - const complex dB = A_forward_prod(ii, idx_rho, t) * A_backward_prod; // dB - product of all A's except t-th - dB_flatten(ii, idx_rho, t) = dB; + const complex dB = A_forward_prod(ii, idx_ms_combs, t) * A_backward_prod; // dB - product of all A's except t-th + dB_flatten(ii, idx_ms_combs, t) = dB; - A_backward_prod = A_backward_prod * A_list(ii, idx_rho, t); + A_backward_prod = A_backward_prod * A_list(ii, idx_ms_combs, t); } - dB_flatten(ii, idx_rho, 0) = A_forward_prod(ii, idx_rho, 0) * A_backward_prod; + dB_flatten(ii, idx_ms_combs, 0) = A_forward_prod(ii, idx_ms_combs, 0) * A_backward_prod; - const complex B = A_forward_prod(ii, idx_rho, rank); + const complex B = A_forward_prod(ii, idx_ms_combs, rank); for (int p = 0; p < ndensity; ++p) { // real-part only multiplication - Kokkos::atomic_add(&rhos(ii, p), B.real_part_product(d_ctildes(mu_i, idx_rho, p))); + Kokkos::atomic_add(&rhos(ii, p), B.real_part_product(d_ctildes(mu_i, idx_ms_combs, p))); } } } /* ---------------------------------------------------------------------- */ - template KOKKOS_INLINE_FUNCTION void PairPACEKokkos::operator() (TagPairPACEComputeFS, const int& ii) const @@ -1064,34 +1163,35 @@ void PairPACEKokkos::operator() (TagPairPACEComputeFS, const int& ii evdwl = fcut = dfcut = 0.0; FS_values_and_derivatives(ii, evdwl, mu_i); + if (is_zbl) { - if (d_jj_min(ii) != -1) { - const int mu_jmin = d_mu(ii,d_jj_min(ii)); - F_FLOAT dcutin = d_dcut_in(mu_i, mu_jmin); - F_FLOAT transition_coordinate = dcutin - d_d_min(ii); // == cutin - r_min - cutoff_func_poly(transition_coordinate, dcutin, dcutin, fcut, dfcut); - dfcut = -dfcut; // invert, because rho_core = cutin - r_min - } else { - // no neighbours - fcut = 1; - dfcut = 0; - } - evdwl_cut = evdwl * fcut + rho_core(ii) * (1 - fcut); // evdwl * fcut + rho_core_uncut - rho_core_uncut* fcut - dF_drho_core(ii) = 1 - fcut; - dF_dfcut(ii) = evdwl * dfcut - rho_core(ii) * dfcut; + if (d_jj_min(ii) != -1) { + const int mu_jmin = d_mu(ii,d_jj_min(ii)); + F_FLOAT dcutin = d_dcut_in(mu_i, mu_jmin); + F_FLOAT transition_coordinate = dcutin - d_d_min(ii); // == cutin - r_min + cutoff_func_poly(transition_coordinate, dcutin, dcutin, fcut, dfcut); + dfcut = -dfcut; // invert, because rho_core = cutin - r_min + } else { + // no neighbours + fcut = 1; + dfcut = 0; + } + evdwl_cut = evdwl * fcut + rho_core(ii) * (1 - fcut); // evdwl * fcut + rho_core_uncut - rho_core_uncut* fcut + dF_drho_core(ii) = 1 - fcut; + dF_dfcut(ii) = evdwl * dfcut - rho_core(ii) * dfcut; } else { - inner_cutoff(rho_core(ii), rho_cut, drho_cut, fcut, dfcut); - dF_drho_core(ii) = evdwl * dfcut + 1; - evdwl_cut = evdwl * fcut + rho_core(ii); + inner_cutoff(rho_core(ii), rho_cut, drho_cut, fcut, dfcut); + dF_drho_core(ii) = evdwl * dfcut + 1; + evdwl_cut = evdwl * fcut + rho_core(ii); } for (int p = 0; p < ndensity; ++p) - dF_drho(ii, p) *= fcut; + dF_drho(ii, p) *= fcut; // tally energy contribution if (eflag) { - // E0 shift - evdwl_cut += d_E0vals(mu_i); - e_atom(ii) = evdwl_cut; + // E0 shift + evdwl_cut += d_E0vals(mu_i); + e_atom(ii) = evdwl_cut; } if (flag_corerep_factor) @@ -1104,52 +1204,58 @@ template KOKKOS_INLINE_FUNCTION void PairPACEKokkos::operator() (TagPairPACEComputeWeights, const int& iter) const { - const int idx_rho = iter / chunk_size; + const int idx_ms_combs = iter / chunk_size; const int ii = iter % chunk_size; const int i = d_ilist[ii + chunk_offset]; const int mu_i = d_map(type(i)); - if (idx_rho >= d_idx_rho_count(mu_i)) return; + if (idx_ms_combs >= d_idx_ms_combs_count(mu_i)) return; const int ndensity = d_ndensity(mu_i); - const int offset = d_offsets(mu_i, idx_rho); - const int rank = d_rank(mu_i, offset); + const int idx_func = d_idx_funcs(mu_i, idx_ms_combs); + const int rank = d_rank(mu_i, idx_func); // Weights and theta calculation if (rank == 1) { - const int mu = d_mus(mu_i, offset, 0); - const int n = d_ns(mu_i, offset, 0); + const int mu = d_mus(mu_i, idx_func, 0); + const int n = d_ns(mu_i, idx_func, 0); double theta = 0.0; for (int p = 0; p < ndensity; ++p) { // for rank=1 (r=0) only 1 ms-combination exists (ms_ind=0), so index of func.ctildes is 0..ndensity-1 - theta += dF_drho(ii, p) * d_ctildes(mu_i, idx_rho, p); + theta += dF_drho(ii, p) * d_ctildes(mu_i, idx_ms_combs, p); } Kokkos::atomic_add(&weights_rank1(ii, mu, n - 1), theta); } else { // rank > 1 double theta = 0.0; for (int p = 0; p < ndensity; ++p) - theta += dF_drho(ii, p) * d_ctildes(mu_i, idx_rho, p); + theta += dF_drho(ii, p) * d_ctildes(mu_i, idx_ms_combs, p); theta *= 0.5; // 0.5 factor due to possible double counting ??? for (int t = 0; t < rank; ++t) { - const int m_t = d_ms_combs(mu_i, idx_rho, t); + const int m_t = d_ms_combs(mu_i, idx_ms_combs, t); const int factor = (m_t % 2 == 0 ? 1 : -1); - const complex dB = dB_flatten(ii, idx_rho, t); - const int mu_t = d_mus(mu_i, offset, t); - const int n_t = d_ns(mu_i, offset, t); - const int l_t = d_ls(mu_i, offset, t); + const complex dB = dB_flatten(ii, idx_ms_combs, t); + const int mu_t = d_mus(mu_i, idx_func, t); + const int n_t = d_ns(mu_i, idx_func, t); + const int l_t = d_ls(mu_i, idx_func, t); const int idx = l_t * (l_t + 1) + m_t; // (l, m) - const complex value = theta * dB; - Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idx).re), value.re); - Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idx).im), value.im); + const int idx_sph = d_idx_sph(idx); + if (idx_sph >= 0) { + const complex value = theta * dB; + Kokkos::atomic_add(&(weights(ii, mu_t, idx_sph, n_t - 1).re), value.re); + Kokkos::atomic_add(&(weights(ii, mu_t, idx_sph, n_t - 1).im), value.im); + } // update -m_t (that could also be positive), because the basis is half_basis const int idxm = l_t * (l_t + 1) - m_t; // (l, -m) - const complex valuem = theta * dB.conj() * (double)factor; - Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idxm).re), valuem.re); - Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idxm).im), valuem.im); + const int idxm_sph = d_idx_sph(idxm); + if (idxm_sph >= 0) { + const complex valuem = theta * dB.conj() * (double)factor; + Kokkos::atomic_add(&(weights(ii, mu_t, idxm_sph, n_t - 1).re), valuem.re); + Kokkos::atomic_add(&(weights(ii, mu_t, idxm_sph, n_t - 1).im), valuem.im); + } } } } @@ -1196,37 +1302,239 @@ void PairPACEKokkos::operator() (TagPairPACEComputeDerivative, const } // for rank > 1 - for (int n = 0; n < nradmax; n++) { - for (int l = 0; l <= lmax; l++) { - const double R_over_r = fr(ii, jj, n, l) * rinv; - const double DR = dfr(ii, jj, n, l); - // for m >= 0 - for (int m = 0; m <= l; m++) { - const int idx = l * (l + 1) + m; // (l, m) - complex w = weights(ii, mu_j, n, idx); + // compute plm, dplm, ylm and dylm + // requires rx^2 + ry^2 + rz^2 = 1 , NO CHECKING IS PERFORMED !!!!!!!!! + // requires -1 <= rz <= 1 , NO CHECKING IS PERFORMED !!!!!!!!! + // prefactors include 1/sqrt(2) factor compared to reference + + complex ylm,dylm[3]; + complex phase; + complex phasem, mphasem1; + complex dyx, dyy, dyz; + complex rdy; + + const double rx = d_rhats(ii, jj, 0); + const double ry = d_rhats(ii, jj, 1); + const double rz = d_rhats(ii, jj, 2); + + phase.re = rx; + phase.im = ry; + + double plm_idx,plm_idx1,plm_idx2; + double dplm_idx,dplm_idx1,dplm_idx2; + + plm_idx = plm_idx1 = plm_idx2 = 0.0; + dplm_idx = dplm_idx1 = dplm_idx2 = 0.0; + + int idx_sph = 0; + + // m = 0 + for (int l = 0; l <= lmax; l++) { + // const int idx = l * (l + 1); + + if (l == 0) { + // l=0, m=0 + // plm[0] = Y00/sq1o4pi; //= sq1o4pi; + plm_idx = Y00; //= 1; + dplm_idx = 0.0; + } else if (l == 1) { + // l=1, m=0 + plm_idx = Y00 * sq3 * rz; + dplm_idx = Y00 * sq3; + } else { + // l>=2, m=0 + plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2); + dplm_idx = alm(idx_sph) * (plm_idx1 + rz * dplm_idx1 + blm(idx_sph) * dplm_idx2); + } + + ylm.re = plm_idx; + ylm.im = 0.0; + + dyz.re = dplm_idx; + rdy.re = dyz.re * rz; + + dylm[0].re = -rdy.re * rx; + dylm[0].im = 0.0; + dylm[1].re = -rdy.re * ry; + dylm[1].im = 0.0; + dylm[2].re = dyz.re - rdy.re * rz; + dylm[2].im = 0; + + for (int n = 0; n < nradmax; n++) { + + const double R_over_r = fr(ii, jj, l, n) * rinv; + const double DR = dfr(ii, jj, l, n); + const complex Y_DR = ylm * DR; + + complex w = weights(ii, mu_j, idx_sph, n); + if (w.re == 0.0 && w.im == 0.0) continue; + + complex grad_phi_nlm[3]; + grad_phi_nlm[0] = Y_DR * r_hat[0] + dylm[0] * R_over_r; + grad_phi_nlm[1] = Y_DR * r_hat[1] + dylm[1] * R_over_r; + grad_phi_nlm[2] = Y_DR * r_hat[2] + dylm[2] * R_over_r; + // real-part multiplication only + f_ji[0] += w.real_part_product(grad_phi_nlm[0]); + f_ji[1] += w.real_part_product(grad_phi_nlm[1]); + f_ji[2] += w.real_part_product(grad_phi_nlm[2]); + } + + plm_idx2 = plm_idx1; + dplm_idx2 = dplm_idx1; + + plm_idx1 = plm_idx; + dplm_idx1 = dplm_idx; + + idx_sph++; + } + + plm_idx = plm_idx1 = plm_idx2 = 0.0; + dplm_idx = dplm_idx1 = dplm_idx2 = 0.0; + + // m = 1 + for (int l = 1; l <= lmax; l++) { + // const int idx = l * (l + 1) + 1; // (l, 1) + + if (l == 1) { + // l=1, m=1 + plm_idx = -sq3o2 * Y00; + dplm_idx = 0.0; + } else if (l == 2) { + const double t = dl(l) * plm_idx1; + plm_idx = t * rz; + dplm_idx = t; + } else { + plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2); + dplm_idx = alm(idx_sph) * (plm_idx1 + rz * dplm_idx1 + blm(idx_sph) * dplm_idx2); + } + + ylm = phase * plm_idx; + + dyx.re = plm_idx; + dyx.im = 0.0; + dyy.re = 0.0; + dyy.im = plm_idx; + dyz.re = phase.re * dplm_idx; + dyz.im = phase.im * dplm_idx; + + rdy.re = rx * dyx.re + +rz * dyz.re; + rdy.im = ry * dyy.im + rz * dyz.im; + + dylm[0].re = dyx.re - rdy.re * rx; + dylm[0].im = -rdy.im * rx; + dylm[1].re = -rdy.re * ry; + dylm[1].im = dyy.im - rdy.im * ry; + dylm[2].re = dyz.re - rdy.re * rz; + dylm[2].im = dyz.im - rdy.im * rz; + + for (int n = 0; n < nradmax; n++) { + + const double R_over_r = fr(ii, jj, l, n) * rinv; + const double DR = dfr(ii, jj, l, n); + const complex Y_DR = ylm * DR; + + complex w = weights(ii, mu_j, idx_sph, n); + if (w.re == 0.0 && w.im == 0.0) continue; + // counting for -m cases if m > 0 + w.re *= 2.0; + w.im *= 2.0; + + complex grad_phi_nlm[3]; + grad_phi_nlm[0] = Y_DR * r_hat[0] + dylm[0] * R_over_r; + grad_phi_nlm[1] = Y_DR * r_hat[1] + dylm[1] * R_over_r; + grad_phi_nlm[2] = Y_DR * r_hat[2] + dylm[2] * R_over_r; + // real-part multiplication only + f_ji[0] += w.real_part_product(grad_phi_nlm[0]); + f_ji[1] += w.real_part_product(grad_phi_nlm[1]); + f_ji[2] += w.real_part_product(grad_phi_nlm[2]); + } + + plm_idx2 = plm_idx1; + dplm_idx2 = dplm_idx1; + + plm_idx1 = plm_idx; + dplm_idx1 = dplm_idx; + + idx_sph++; + } + + plm_idx = plm_idx1 = plm_idx2 = 0.0; + dplm_idx = dplm_idx1 = dplm_idx2 = 0.0; + + double plm_mm1_mm1 = -sq3o2 * Y00; // (1, 1) + + // m > 1 + phasem = phase; + for (int m = 2; m <= lmax; m++) { + + mphasem1.re = phasem.re * double(m); + mphasem1.im = phasem.im * double(m); + phasem = phasem * phase; + + for (int l = m; l <= lmax; l++) { + // const int idx = l * (l + 1) + m; + + if (l == m) { + plm_idx = cl(l) * plm_mm1_mm1; // (m+1, m) + dplm_idx = 0.0; + plm_mm1_mm1 = plm_idx; + } else if (l == (m + 1)) { + const double t = dl(l) * plm_mm1_mm1; // (m - 1, m - 1) + plm_idx = t * rz; // (m, m) + dplm_idx = t; + } else { + plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2); + dplm_idx = alm(idx_sph) * (plm_idx1 + rz * dplm_idx1 + blm(idx_sph) * dplm_idx2); + } + + ylm.re = phasem.re * plm_idx; + ylm.im = phasem.im * plm_idx; + + dyx = mphasem1 * plm_idx; + dyy.re = -dyx.im; + dyy.im = dyx.re; + dyz = phasem * dplm_idx; + + rdy.re = rx * dyx.re + ry * dyy.re + rz * dyz.re; + rdy.im = rx * dyx.im + ry * dyy.im + rz * dyz.im; + + dylm[0].re = dyx.re - rdy.re * rx; + dylm[0].im = dyx.im - rdy.im * rx; + dylm[1].re = dyy.re - rdy.re * ry; + dylm[1].im = dyy.im - rdy.im * ry; + dylm[2].re = dyz.re - rdy.re * rz; + dylm[2].im = dyz.im - rdy.im * rz; + + for (int n = 0; n < nradmax; n++) { + + const double R_over_r = fr(ii, jj, l, n) * rinv; + const double DR = dfr(ii, jj, l, n); + const complex Y_DR = ylm * DR; + + complex w = weights(ii, mu_j, idx_sph, n); if (w.re == 0.0 && w.im == 0.0) continue; // counting for -m cases if m > 0 - if (m > 0) { - w.re *= 2.0; - w.im *= 2.0; - } - - complex DY[3]; - DY[0] = dylm(ii, jj, idx, 0); - DY[1] = dylm(ii, jj, idx, 1); - DY[2] = dylm(ii, jj, idx, 2); - const complex Y_DR = ylm(ii, jj, idx) * DR; + w.re *= 2.0; + w.im *= 2.0; complex grad_phi_nlm[3]; - grad_phi_nlm[0] = Y_DR * r_hat[0] + DY[0] * R_over_r; - grad_phi_nlm[1] = Y_DR * r_hat[1] + DY[1] * R_over_r; - grad_phi_nlm[2] = Y_DR * r_hat[2] + DY[2] * R_over_r; + grad_phi_nlm[0] = Y_DR * r_hat[0] + dylm[0] * R_over_r; + grad_phi_nlm[1] = Y_DR * r_hat[1] + dylm[1] * R_over_r; + grad_phi_nlm[2] = Y_DR * r_hat[2] + dylm[2] * R_over_r; // real-part multiplication only f_ji[0] += w.real_part_product(grad_phi_nlm[0]); f_ji[1] += w.real_part_product(grad_phi_nlm[1]); f_ji[2] += w.real_part_product(grad_phi_nlm[2]); } + + plm_idx2 = plm_idx1; + dplm_idx2 = dplm_idx1; + + plm_idx1 = plm_idx; + dplm_idx1 = dplm_idx; + + idx_sph++; } } @@ -1238,10 +1546,10 @@ void PairPACEKokkos::operator() (TagPairPACEComputeDerivative, const if (is_zbl) { if (jj==d_jj_min(ii)) { - // DCRU = 1.0 - f_ij(ii, jj, 0) += dF_dfcut(ii) * r_hat[0]; - f_ij(ii, jj, 1) += dF_dfcut(ii) * r_hat[1]; - f_ij(ii, jj, 2) += dF_dfcut(ii) * r_hat[2]; + // DCRU = 1.0 + f_ij(ii, jj, 0) += dF_dfcut(ii) * r_hat[0]; + f_ij(ii, jj, 1) += dF_dfcut(ii) * r_hat[1]; + f_ij(ii, jj, 2) += dF_dfcut(ii) * r_hat[2]; } } } @@ -1364,31 +1672,46 @@ void PairPACEKokkos::v_tally_xyz(EV_FLOAT &ev, const int &i, const i template void PairPACEKokkos::pre_compute_harmonics(int lmax) { + auto h_idx_sph = Kokkos::create_mirror_view(d_idx_sph); auto h_alm = Kokkos::create_mirror_view(alm); auto h_blm = Kokkos::create_mirror_view(blm); auto h_cl = Kokkos::create_mirror_view(cl); auto h_dl = Kokkos::create_mirror_view(dl); - for (int l = 1; l <= lmax; l++) { - const double lsq = l * l; - const double ld = 2 * l; - const double l1 = (4 * lsq - 1); - const double l2 = lsq - ld + 1; - for (int m = 0; m < l - 1; m++) { - const double msq = m * m; - const double a = sqrt((double(l1)) / (double(lsq - msq))); - const double b = -sqrt((double(l2 - msq)) / (double(4 * l2 - 1))); + Kokkos::deep_copy(h_idx_sph,-1); + + int idx_sph = 0; + for (int m = 0; m <= lmax; m++) { + const double msq = m * m; + for (int l = m; l <= lmax; l++) { const int idx = l * (l + 1) + m; // (l, m) - h_alm(idx) = a; - h_blm(idx) = b; + h_idx_sph(idx) = idx_sph; + + double a = 0.0; + double b = 0.0; + + if (l > 1 && l != m) { + const double lsq = l * l; + const double ld = 2 * l; + const double l1 = (4 * lsq - 1); + const double l2 = lsq - ld + 1; + + a = sqrt((double(l1)) / (double(lsq - msq))); + b = -sqrt((double(l2 - msq)) / (double(4 * l2 - 1))); + } + h_alm(idx_sph) = a; + h_blm(idx_sph) = b; + idx_sph++; } } + idx_sph_max = idx_sph; for (int l = 1; l <= lmax; l++) { h_cl(l) = -sqrt(1.0 + 0.5 / (double(l))); h_dl(l) = sqrt(double(2 * (l - 1) + 3)); } + Kokkos::deep_copy(d_idx_sph, h_idx_sph); Kokkos::deep_copy(alm, h_alm); Kokkos::deep_copy(blm, h_blm); Kokkos::deep_copy(cl, h_cl); @@ -1397,143 +1720,6 @@ void PairPACEKokkos::pre_compute_harmonics(int lmax) /* ---------------------------------------------------------------------- */ -template -KOKKOS_INLINE_FUNCTION -void PairPACEKokkos::compute_barplm(int ii, int jj, double rz, int lmax) const -{ - // requires -1 <= rz <= 1 , NO CHECKING IS PERFORMED !!!!!!!!! - // prefactors include 1/sqrt(2) factor compared to reference - - // l=0, m=0 - // plm(ii, jj, 0, 0) = Y00/sq1o4pi; //= sq1o4pi; - plm(ii, jj, 0) = Y00; //= 1; - dplm(ii, jj, 0) = 0.0; - - if (lmax > 0) { - - // l=1, m=0 - plm(ii, jj, 2) = Y00 * sq3 * rz; - dplm(ii, jj, 2) = Y00 * sq3; - - // l=1, m=1 - plm(ii, jj, 3) = -sq3o2 * Y00; - dplm(ii, jj, 3) = 0.0; - - // loop l = 2, lmax - for (int l = 2; l <= lmax; l++) { - for (int m = 0; m < l - 1; m++) { - const int idx = l * (l + 1) + m; // (l, m) - const int idx1 = (l - 1) * l + m; // (l - 1, m) - const int idx2 = (l - 2) * (l - 1) + m; // (l - 2, m) - plm(ii, jj, idx) = alm(idx) * (rz * plm(ii, jj, idx1) + blm(idx) * plm(ii, jj, idx2)); - dplm(ii, jj, idx) = alm(idx) * (plm(ii, jj, idx1) + rz * dplm(ii, jj, idx1) + blm(idx) * dplm(ii, jj, idx2)); - } - const int idx = l * (l + 1) + l; // (l, l) - const int idx1 = l * (l + 1) + l - 1; // (l, l - 1) - const int idx2 = (l - 1) * l + l - 1; // (l - 1, l - 1) - const double t = dl(l) * plm(ii, jj, idx2); - plm(ii, jj, idx1) = t * rz; - dplm(ii, jj, idx1) = t; - plm(ii, jj, idx) = cl(l) * plm(ii, jj, idx2); - dplm(ii, jj, idx) = 0.0; - } - } -} - -/* ---------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void PairPACEKokkos::compute_ylm(int ii, int jj, double rx, double ry, double rz, int lmax) const -{ - // requires rx^2 + ry^2 + rz^2 = 1 , NO CHECKING IS PERFORMED !!!!!!!!! - - complex phase; - complex phasem, mphasem1; - complex dyx, dyy, dyz; - complex rdy; - - phase.re = rx; - phase.im = ry; - - // compute barplm - compute_barplm(ii, jj, rz, lmax); - - // m = 0 - for (int l = 0; l <= lmax; l++) { - const int idx = l * (l + 1); - - ylm(ii, jj, idx).re = plm(ii, jj, idx); - ylm(ii, jj, idx).im = 0.0; - - dyz.re = dplm(ii, jj, idx); - rdy.re = dyz.re * rz; - - dylm(ii, jj, idx, 0).re = -rdy.re * rx; - dylm(ii, jj, idx, 0).im = 0.0; - dylm(ii, jj, idx, 1).re = -rdy.re * ry; - dylm(ii, jj, idx, 1).im = 0.0; - dylm(ii, jj, idx, 2).re = dyz.re - rdy.re * rz; - dylm(ii, jj, idx, 2).im = 0; - } - // m = 1 - for (int l = 1; l <= lmax; l++) { - const int idx = l * (l + 1) + 1; - - ylm(ii, jj, idx) = phase * plm(ii, jj, idx); - - dyx.re = plm(ii, jj, idx); - dyx.im = 0.0; - dyy.re = 0.0; - dyy.im = plm(ii, jj, idx); - dyz.re = phase.re * dplm(ii, jj, idx); - dyz.im = phase.im * dplm(ii, jj, idx); - - rdy.re = rx * dyx.re + +rz * dyz.re; - rdy.im = ry * dyy.im + rz * dyz.im; - - dylm(ii, jj, idx, 0).re = dyx.re - rdy.re * rx; - dylm(ii, jj, idx, 0).im = -rdy.im * rx; - dylm(ii, jj, idx, 1).re = -rdy.re * ry; - dylm(ii, jj, idx, 1).im = dyy.im - rdy.im * ry; - dylm(ii, jj, idx, 2).re = dyz.re - rdy.re * rz; - dylm(ii, jj, idx, 2).im = dyz.im - rdy.im * rz; - } - - // m > 1 - phasem = phase; - for (int m = 2; m <= lmax; m++) { - - mphasem1.re = phasem.re * double(m); - mphasem1.im = phasem.im * double(m); - phasem = phasem * phase; - - for (int l = m; l <= lmax; l++) { - const int idx = l * (l + 1) + m; - - ylm(ii, jj, idx).re = phasem.re * plm(ii, jj, idx); - ylm(ii, jj, idx).im = phasem.im * plm(ii, jj, idx); - - dyx = mphasem1 * plm(ii, jj, idx); - dyy.re = -dyx.im; - dyy.im = dyx.re; - dyz = phasem * dplm(ii, jj, idx); - - rdy.re = rx * dyx.re + ry * dyy.re + rz * dyz.re; - rdy.im = rx * dyx.im + ry * dyy.im + rz * dyz.im; - - dylm(ii, jj, idx, 0).re = dyx.re - rdy.re * rx; - dylm(ii, jj, idx, 0).im = dyx.im - rdy.im * rx; - dylm(ii, jj, idx, 1).re = dyy.re - rdy.re * ry; - dylm(ii, jj, idx, 1).im = dyy.im - rdy.im * ry; - dylm(ii, jj, idx, 2).re = dyz.re - rdy.re * rz; - dylm(ii, jj, idx, 2).im = dyz.im - rdy.im * rz; - } - } -} - -/* ---------------------------------------------------------------------- */ - template KOKKOS_INLINE_FUNCTION void PairPACEKokkos::cutoff_func_poly(const double r, const double r_in, const double delta_in, double &fc, double &dfc) const @@ -1662,11 +1848,11 @@ void PairPACEKokkos::evaluate_splines(const int ii, const int jj, do spline_gk.calcSplines(ii, jj, r, gr, dgr); spline_rnl.calcSplines(ii, jj, r, d_values, d_derivatives); - for (int kk = 0; kk < (int)fr.extent(2); kk++) { - for (int ll = 0; ll < (int)fr.extent(3); ll++) { - const int flatten = kk*fr.extent(3) + ll; - fr(ii, jj, kk, ll) = d_values(ii, jj, flatten); - dfr(ii, jj, kk, ll) = d_derivatives(ii, jj, flatten); + for (int ll = 0; ll < (int)fr.extent(2); ll++) { + for (int kk = 0; kk < (int)fr.extent(3); kk++) { + const int flatten = kk*fr.extent(2) + ll; + fr(ii, jj, ll, kk) = d_values(ii, jj, flatten); + dfr(ii, jj, ll, kk) = d_derivatives(ii, jj, flatten); } } @@ -1686,7 +1872,7 @@ void PairPACEKokkos::SplineInterpolatorKokkos::operator=(const Splin rscalelookup = spline.rscalelookup; num_of_functions = spline.num_of_functions; - lookupTable = t_ace_3d4("lookupTable", ntot+1, num_of_functions); + lookupTable = t_ace_3d4_lr("lookupTable", ntot+1, num_of_functions); auto h_lookupTable = Kokkos::create_mirror_view(lookupTable); for (int i = 0; i < ntot+1; i++) for (int j = 0; j < num_of_functions; j++) @@ -1792,10 +1978,6 @@ double PairPACEKokkos::memory_usage() bytes += MemKK::memory_usage(d_derivatives); bytes += MemKK::memory_usage(cr); bytes += MemKK::memory_usage(dcr); - bytes += MemKK::memory_usage(plm); - bytes += MemKK::memory_usage(dplm); - bytes += MemKK::memory_usage(ylm); - bytes += MemKK::memory_usage(dylm); bytes += MemKK::memory_usage(d_ncount); bytes += MemKK::memory_usage(d_mu); bytes += MemKK::memory_usage(d_rhats); @@ -1811,10 +1993,10 @@ double PairPACEKokkos::memory_usage() bytes += MemKK::memory_usage(d_npoti); bytes += MemKK::memory_usage(d_wpre); bytes += MemKK::memory_usage(d_mexp); - bytes += MemKK::memory_usage(d_idx_rho_count); + bytes += MemKK::memory_usage(d_idx_ms_combs_count); bytes += MemKK::memory_usage(d_rank); bytes += MemKK::memory_usage(d_num_ms_combs); - bytes += MemKK::memory_usage(d_offsets); + bytes += MemKK::memory_usage(d_idx_funcs); bytes += MemKK::memory_usage(d_mus); bytes += MemKK::memory_usage(d_ns); bytes += MemKK::memory_usage(d_ls); diff --git a/src/KOKKOS/pair_pace_kokkos.h b/src/KOKKOS/pair_pace_kokkos.h index 36486f8628..e22c61f0ea 100644 --- a/src/KOKKOS/pair_pace_kokkos.h +++ b/src/KOKKOS/pair_pace_kokkos.h @@ -36,7 +36,6 @@ class PairPACEKokkos : public PairPACE { public: struct TagPairPACEComputeNeigh{}; struct TagPairPACEComputeRadial{}; - struct TagPairPACEComputeYlm{}; struct TagPairPACEComputeAi{}; struct TagPairPACEConjugateAi{}; struct TagPairPACEComputeRho{}; @@ -66,9 +65,6 @@ class PairPACEKokkos : public PairPACE { KOKKOS_INLINE_FUNCTION void operator() (TagPairPACEComputeRadial,const typename Kokkos::TeamPolicy::member_type& team) const; - KOKKOS_INLINE_FUNCTION - void operator() (TagPairPACEComputeYlm,const typename Kokkos::TeamPolicy::member_type& team) const; - KOKKOS_INLINE_FUNCTION void operator() (TagPairPACEComputeAi,const typename Kokkos::TeamPolicy::member_type& team) const; @@ -96,7 +92,7 @@ class PairPACEKokkos : public PairPACE { void operator() (TagPairPACEComputeForce,const int& ii, EV_FLOAT&) const; protected: - int inum, maxneigh, chunk_size, chunk_offset, idx_rho_max; + int inum, maxneigh, chunk_size, chunk_offset, idx_ms_combs_max, idx_sph_max; int host_flag; int eflag, vflag; @@ -157,12 +153,6 @@ class PairPACEKokkos : public PairPACE { const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const; - KOKKOS_INLINE_FUNCTION - void compute_barplm(int, int, double, int) const; - - KOKKOS_INLINE_FUNCTION - void compute_ylm(int, int, double, double, double, int) const; - KOKKOS_INLINE_FUNCTION void cutoff_func_poly(const double, const double, const double, double &, double &) const; @@ -194,14 +184,18 @@ class PairPACEKokkos : public PairPACE { typedef Kokkos::View t_ace_1i; typedef Kokkos::View t_ace_2i; + typedef Kokkos::View t_ace_2i_lr; typedef Kokkos::View t_ace_3i; + typedef Kokkos::View t_ace_3i_lr; typedef Kokkos::View t_ace_4i; typedef Kokkos::View t_ace_1d; typedef Kokkos::View t_ace_2d; + typedef Kokkos::View t_ace_2d_lr; typedef Kokkos::View t_ace_2d3; typedef Kokkos::View t_ace_3d; typedef Kokkos::View t_ace_3d3; typedef Kokkos::View t_ace_3d4; + typedef Kokkos::View t_ace_3d4_lr; typedef Kokkos::View t_ace_4d; typedef Kokkos::View t_ace_1c; typedef Kokkos::View t_ace_2c; @@ -248,23 +242,13 @@ class PairPACEKokkos : public PairPACE { void pre_compute_harmonics(int); - KOKKOS_INLINE_FUNCTION - void compute_barplm(double rz, int lmaxi); - - KOKKOS_INLINE_FUNCTION - void compute_ylm(double rx, double ry, double rz, int lmaxi); - + t_ace_4c A_sph; + t_ace_1d d_idx_sph; t_ace_1d alm; t_ace_1d blm; t_ace_1d cl; t_ace_1d dl; - t_ace_3d plm; - t_ace_3d dplm; - - t_ace_3c ylm; - t_ace_4c3 dylm; - // short neigh list t_ace_1i d_ncount; t_ace_2d d_mu; @@ -283,18 +267,18 @@ class PairPACEKokkos : public PairPACE { t_ace_1d d_rho_core_cutoff; t_ace_1d d_drho_core_cutoff; t_ace_1d d_E0vals; - t_ace_2d d_wpre; - t_ace_2d d_mexp; + t_ace_2d_lr d_wpre; + t_ace_2d_lr d_mexp; // tilde - t_ace_1i d_idx_rho_count; - t_ace_2i d_rank; - t_ace_2i d_num_ms_combs; - t_ace_2i d_offsets; - t_ace_3i d_mus; - t_ace_3i d_ns; - t_ace_3i d_ls; - t_ace_3i d_ms_combs; + t_ace_1i d_idx_ms_combs_count; + t_ace_2i_lr d_rank; + t_ace_2i_lr d_num_ms_combs; + t_ace_2i_lr d_idx_funcs; + t_ace_3i_lr d_mus; + t_ace_3i_lr d_ns; + t_ace_3i_lr d_ls; + t_ace_3i_lr d_ms_combs; t_ace_3d d_ctildes; t_ace_3d3 f_ij; @@ -304,12 +288,12 @@ class PairPACEKokkos : public PairPACE { int ntot, nlut, num_of_functions; double cutoff, deltaSplineBins, invrscalelookup, rscalelookup; - t_ace_3d4 lookupTable; + t_ace_3d4_lr lookupTable; void operator=(const SplineInterpolator &spline); void deallocate() { - lookupTable = t_ace_3d4(); + lookupTable = t_ace_3d4_lr(); } double memory_usage() { diff --git a/src/KOKKOS/pair_reaxff_kokkos.cpp b/src/KOKKOS/pair_reaxff_kokkos.cpp index 11a40970c2..505681acb3 100644 --- a/src/KOKKOS/pair_reaxff_kokkos.cpp +++ b/src/KOKKOS/pair_reaxff_kokkos.cpp @@ -1598,7 +1598,6 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfBlocking< F_FLOAT dDeltap_self_i[3] = {0.0,0.0,0.0}; F_FLOAT total_bo_i = 0.0; - int j_index,i_index; d_bo_first[i] = i*maxbo; const int bo_first_i = d_bo_first[i]; @@ -1675,7 +1674,7 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfBlocking< int ii_index = -1; int jj_index = -1; - if (build_bo_list(bo_first_i, i, j, i_index, j_index, ii_index, jj_index)) { + if (build_bo_list(bo_first_i, i, j, ii_index, jj_index)) { // from BondOrder1 @@ -1743,7 +1742,6 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfBlockingP F_FLOAT C12, C34, C56, BO_s, BO_pi, BO_pi2, BO, delij[3]; - int j_index,i_index; d_bo_first[i] = i*maxbo; const int bo_first_i = d_bo_first[i]; @@ -1821,7 +1819,7 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfBlockingP int ii_index = -1; int jj_index = -1; - build_bo_list(bo_first_i, i, j, i_index, j_index, ii_index, jj_index); + build_bo_list(bo_first_i, i, j, ii_index, jj_index); } } } @@ -1842,7 +1840,6 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfPreview::operator()(TagPairReaxBuildListsHalfPreview(bo_first_i, i, j, i_index, j_index, ii_index, jj_index); + build_bo_list(bo_first_i, i, j, ii_index, jj_index); } } @@ -1942,7 +1939,8 @@ void PairReaxFFKokkos::build_hb_list(F_FLOAT rsq, int i, int hb_firs template template KOKKOS_INLINE_FUNCTION -bool PairReaxFFKokkos::build_bo_list(int bo_first_i, int i, int j, int i_index, int j_index, int& ii_index, int& jj_index) const { +bool PairReaxFFKokkos::build_bo_list(int bo_first_i, int i, int j, int& ii_index, int& jj_index) const { + int i_index, j_index; if (NEIGHFLAG == HALF) { j_index = bo_first_i + d_bo_num[i]; @@ -2509,8 +2507,6 @@ void PairReaxFFKokkos::compute_angular_sbo(int i, int itype, int j_s F_FLOAT prod_SBO = 1.0; for (int jj = j_start; jj < j_end; jj++) { - int j = d_bo_list[jj]; - j &= NEIGHMASK; const int j_index = jj - j_start; const F_FLOAT bo_ij = d_BO(i,j_index); @@ -2919,8 +2915,6 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeAngularPreproces a_CdDelta[k] += CEcoa5; for (int ll = j_start; ll < j_end; ll++) { - int l = d_bo_list[ll]; - l &= NEIGHMASK; const int l_index = ll - j_start; temp_bo_jt = d_BO(i,l_index); diff --git a/src/KOKKOS/pair_reaxff_kokkos.h b/src/KOKKOS/pair_reaxff_kokkos.h index fba7c03ec4..5f228ebd19 100644 --- a/src/KOKKOS/pair_reaxff_kokkos.h +++ b/src/KOKKOS/pair_reaxff_kokkos.h @@ -185,7 +185,7 @@ class PairReaxFFKokkos : public PairReaxFF { // Returns if we need to populate d_d* functions or not template KOKKOS_INLINE_FUNCTION - bool build_bo_list(int, int, int, int, int, int&, int&) const; + bool build_bo_list(int, int, int, int&, int&) const; KOKKOS_INLINE_FUNCTION void operator()(TagPairReaxBuildListsFull, const int&) const; @@ -526,7 +526,7 @@ struct PairReaxKokkosFindBondFunctor { typedef int value_type; int groupbit; PairReaxFFKokkos c; - PairReaxKokkosFindBondFunctor(PairReaxFFKokkos* c_ptr, int groupbit):c(*c_ptr),groupbit(groupbit) {}; + PairReaxKokkosFindBondFunctor(PairReaxFFKokkos* c_ptr, int groupbit):groupbit(groupbit),c(*c_ptr){}; KOKKOS_INLINE_FUNCTION void join(int &dst, diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 2a53682df3..36d5974c6d 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -1371,8 +1371,6 @@ void PPPMKokkos::operator()(TagPPPM_brick2fft, const int &ii) const template void PPPMKokkos::poisson_ik() { - int j; - // transform charge density (r -> k) copymode = 1; @@ -1383,7 +1381,8 @@ void PPPMKokkos::poisson_ik() // global energy and virial contribution - scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm; + scaleinv = 1.0/ngridtotal; s2 = scaleinv*scaleinv; if (eflag_global || vflag_global) { @@ -1392,7 +1391,7 @@ void PPPMKokkos::poisson_ik() copymode = 1; Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nfft),*this,ev); copymode = 0; - for (j = 0; j < 6; j++) virial[j] += ev.v[j]; + for (int j = 0; j < 6; j++) virial[j] += ev.v[j]; energy += ev.ecoul; } else { copymode = 1; diff --git a/src/KOKKOS/third_order_kokkos.cpp b/src/KOKKOS/third_order_kokkos.cpp index 6208aa966a..04c467777f 100644 --- a/src/KOKKOS/third_order_kokkos.cpp +++ b/src/KOKKOS/third_order_kokkos.cpp @@ -174,72 +174,45 @@ void ThirdOrderKokkos::update_force() } bool execute_on_host = false; - unsigned int datamask_read_device = 0; - unsigned int datamask_modify_device = 0; unsigned int datamask_read_host = 0; if (pair_compute_flag) { if (force->pair->execution_space==Host) { execute_on_host = true; datamask_read_host |= force->pair->datamask_read; - datamask_modify_device |= force->pair->datamask_modify; - } else { - datamask_read_device |= force->pair->datamask_read; - datamask_modify_device |= force->pair->datamask_modify; } } if (atomKK->molecular && force->bond) { if (force->bond->execution_space==Host) { execute_on_host = true; datamask_read_host |= force->bond->datamask_read; - datamask_modify_device |= force->bond->datamask_modify; - } else { - datamask_read_device |= force->bond->datamask_read; - datamask_modify_device |= force->bond->datamask_modify; } } if (atomKK->molecular && force->angle) { if (force->angle->execution_space==Host) { execute_on_host = true; datamask_read_host |= force->angle->datamask_read; - datamask_modify_device |= force->angle->datamask_modify; - } else { - datamask_read_device |= force->angle->datamask_read; - datamask_modify_device |= force->angle->datamask_modify; } } if (atomKK->molecular && force->dihedral) { if (force->dihedral->execution_space==Host) { execute_on_host = true; datamask_read_host |= force->dihedral->datamask_read; - datamask_modify_device |= force->dihedral->datamask_modify; - } else { - datamask_read_device |= force->dihedral->datamask_read; - datamask_modify_device |= force->dihedral->datamask_modify; } } if (atomKK->molecular && force->improper) { if (force->improper->execution_space==Host) { execute_on_host = true; datamask_read_host |= force->improper->datamask_read; - datamask_modify_device |= force->improper->datamask_modify; - } else { - datamask_read_device |= force->improper->datamask_read; - datamask_modify_device |= force->improper->datamask_modify; } } if (kspace_compute_flag) { if (force->kspace->execution_space==Host) { execute_on_host = true; datamask_read_host |= force->kspace->datamask_read; - datamask_modify_device |= force->kspace->datamask_modify; - } else { - datamask_read_device |= force->kspace->datamask_read; - datamask_modify_device |= force->kspace->datamask_modify; } } - if (pair_compute_flag) { atomKK->sync(force->pair->execution_space,force->pair->datamask_read); atomKK->sync(force->pair->execution_space,~(~force->pair->datamask_read|(F_MASK | ENERGY_MASK | VIRIAL_MASK))); diff --git a/src/KOKKOS/transpose_helper_kokkos.h b/src/KOKKOS/transpose_helper_kokkos.h index e3a4d86f9a..06af0aea91 100644 --- a/src/KOKKOS/transpose_helper_kokkos.h +++ b/src/KOKKOS/transpose_helper_kokkos.h @@ -125,8 +125,7 @@ struct TransposeHelperKokkos { elem[0] = extent_tile_id[0] * tile_size; elem[1] = extent_tile_id[1] * tile_size; - if (elem[0] >= d_dst.extent(0) || - elem[1] >= d_dst.extent(1)) return; + if ((elem[0] >= (int)d_dst.extent(0)) || (elem[1] >= (int)d_dst.extent(1))) return; // determine if a row/column is a full `tile_size` in size or not bool perfect_pad[2]; @@ -135,35 +134,30 @@ struct TransposeHelperKokkos { // load phase if (src_is_layout_right) { - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size), - [&] (const int j) { - - if (elem[1] + j < d_src.extent(1)) { - if (perfect_pad[0]) { - for (int i = 0; i < tile_size; i++) - buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j); - } else { - for (int i = 0; i < (d_src.extent(0) - elem[0]); i++) - buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size), [&] (const int j) { + if (elem[1] + j < (int)d_src.extent(1)) { + if (perfect_pad[0]) { + for (int i = 0; i < tile_size; i++) + buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j); + } else { + for (int i = 0; i < ((int)d_src.extent(0) - elem[0]); i++) + buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j); + } } - } - }); - + }); } else { // src is layout left - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size), - [&] (const int i) { - - if (elem[0] + i < d_src.extent(0)) { - if (perfect_pad[1]) { - for (int j = 0; j < tile_size; j++) - buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j); - } else { - for (int j = 0; j < (d_src.extent(1) - elem[1]); j++) - buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size), [&] (const int i) { + if (elem[0] + i < (int)d_src.extent(0)) { + if (perfect_pad[1]) { + for (int j = 0; j < tile_size; j++) + buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j); + } else { + for (int j = 0; j < ((int)d_src.extent(1) - elem[1]); j++) + buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j); + } } - } - }); + }); } // No need for an extra sync b/c there is an implicit sync at the end @@ -171,37 +165,31 @@ struct TransposeHelperKokkos { // save phase if (src_is_layout_right) { - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size), - [&] (const int i) { - - if (elem[0] + i < d_dst.extent(0)) { - if (perfect_pad[1]) { - for (int j = 0; j < tile_size; j++) - d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j]; - } else { - for (int j = 0; j < (d_dst.extent(1) - elem[1]); j++) - d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j]; + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size), [&] (const int i) { + if (elem[0] + i < (int)d_dst.extent(0)) { + if (perfect_pad[1]) { + for (int j = 0; j < tile_size; j++) + d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j]; + } else { + for (int j = 0; j < ((int)d_dst.extent(1) - elem[1]); j++) + d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j]; + } } - } - }); + }); } else { - // src is layout left - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size), - [&] (const int j) { - - if (elem[1] + j < d_dst.extent(1)) { - if (perfect_pad[0]) { - for (int i = 0; i < tile_size; i++) - d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j]; - } else { - for (int i = 0; i < (d_dst.extent(0) - elem[0]); i++) - d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j]; + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size), [&] (const int j) { + if (elem[1] + j < (int)d_dst.extent(1)) { + if (perfect_pad[0]) { + for (int i = 0; i < tile_size; i++) + d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j]; + } else { + for (int i = 0; i < ((int)d_dst.extent(0) - elem[0]); i++) + d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j]; + } } - } - }); + }); } - } }; diff --git a/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp b/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp index b7635c49c7..260c26e8aa 100644 --- a/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp +++ b/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp @@ -76,6 +76,8 @@ PairLJCharmmfswCoulLong::PairLJCharmmfswCoulLong(LAMMPS *lmp) : Pair(lmp) PairLJCharmmfswCoulLong::~PairLJCharmmfswCoulLong() { + if (copymode) return; + // switch qqr2e back from CHARMM value to LAMMPS value if (update && strcmp(update->unit_style,"real") == 0) { @@ -85,8 +87,6 @@ PairLJCharmmfswCoulLong::~PairLJCharmmfswCoulLong() force->qqr2e = force->qqr2e_lammps_real; } - if (copymode) return; - if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp index 2f5b4fc670..ac516ff18c 100644 --- a/src/KSPACE/pppm.cpp +++ b/src/KSPACE/pppm.cpp @@ -1188,7 +1188,7 @@ double PPPM::compute_qopt() // each proc calculates contributions from every Pth grid point bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm; - int nxy_pppm = nx_pppm * ny_pppm; + bigint nxy_pppm = (bigint) nx_pppm * ny_pppm; double qopt = 0.0; @@ -1944,7 +1944,8 @@ void PPPM::poisson_ik() // global energy and virial contribution - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm; + double scaleinv = 1.0/ngridtotal; double s2 = scaleinv*scaleinv; if (eflag_global || vflag_global) { @@ -2145,7 +2146,8 @@ void PPPM::poisson_ad() // global energy and virial contribution - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm; + double scaleinv = 1.0/ngridtotal; double s2 = scaleinv*scaleinv; if (eflag_global || vflag_global) { @@ -3259,7 +3261,8 @@ void PPPM::poisson_groups(int AA_flag) // keep everything in reciprocal space so // no inverse FFTs needed - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm; + double scaleinv = 1.0/ngridtotal; double s2 = scaleinv*scaleinv; // energy diff --git a/src/KSPACE/pppm_dipole.cpp b/src/KSPACE/pppm_dipole.cpp index a01ffea1dc..e0d13f2b9a 100644 --- a/src/KSPACE/pppm_dipole.cpp +++ b/src/KSPACE/pppm_dipole.cpp @@ -1338,7 +1338,8 @@ void PPPMDipole::poisson_ik_dipole() // global energy and virial contribution - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm; + double scaleinv = 1.0/ngridtotal; double s2 = scaleinv*scaleinv; if (eflag_global || vflag_global) { diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp index 72424a7330..a738db98d2 100644 --- a/src/KSPACE/pppm_disp.cpp +++ b/src/KSPACE/pppm_disp.cpp @@ -4556,7 +4556,8 @@ void PPPMDisp::poisson_ik(FFT_SCALAR* wk1, FFT_SCALAR* wk2, // if requested, compute energy and virial contribution - double scaleinv = 1.0/(nx_p*ny_p*nz_p); + bigint ngridtotal = (bigint) nx_p * ny_p * nz_p; + double scaleinv = 1.0/ngridtotal; double s2 = scaleinv*scaleinv; if (eflag_global || vflag_global) { @@ -4696,7 +4697,8 @@ void PPPMDisp::poisson_ad(FFT_SCALAR* wk1, FFT_SCALAR* wk2, // if requested, compute energy and virial contribution - double scaleinv = 1.0/(nx_p*ny_p*nz_p); + bigint ngridtotal = (bigint) nx_p * ny_p * nz_p; + double scaleinv = 1.0/ngridtotal; double s2 = scaleinv*scaleinv; if (eflag_global || vflag_global) { @@ -4844,7 +4846,8 @@ poisson_2s_ik(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, int i,j,k,n; double eng; - double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); + bigint ngridtotal = (bigint) nx_pppm_6 * ny_pppm_6 * nz_pppm_6; + double scaleinv = 1.0/ngridtotal; // transform charge/dispersion density (r -> k) // only one transform when energies and pressures not calculated @@ -5017,7 +5020,8 @@ poisson_none_ik(int n1, int n2,FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, int i,j,k,n; double eng; - double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); + bigint ngridtotal = (bigint) nx_pppm_6 * ny_pppm_6 * nz_pppm_6; + double scaleinv = 1.0/ngridtotal; // transform charge/dispersion density (r -> k) // only one transform required when energies and pressures not needed @@ -5191,7 +5195,8 @@ poisson_2s_ad(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, int i,j,k,n; double eng; - double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); + bigint ngridtotal = (bigint) nx_pppm_6 * ny_pppm_6 * nz_pppm_6; + double scaleinv = 1.0/ngridtotal; // transform charge/dispersion density (r -> k) // only one tansform required when energies and pressures not needed @@ -5289,7 +5294,8 @@ poisson_none_ad(int n1, int n2, FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, int i,j,k,n; double eng; - double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); + bigint ngridtotal = (bigint) nx_pppm_6 * ny_pppm_6 * nz_pppm_6; + double scaleinv = 1.0/ngridtotal; // transform charge/dispersion density (r -> k) // only one tansform required when energies and pressures not needed diff --git a/src/KSPACE/pppm_stagger.cpp b/src/KSPACE/pppm_stagger.cpp index d6f3c9cac6..d44f2428c8 100644 --- a/src/KSPACE/pppm_stagger.cpp +++ b/src/KSPACE/pppm_stagger.cpp @@ -302,7 +302,7 @@ double PPPMStagger::compute_qopt() // each proc calculates contributions from every Pth grid point bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm; - int nxy_pppm = nx_pppm * ny_pppm; + bigint nxy_pppm = (bigint) nx_pppm * ny_pppm; double qopt = 0.0; @@ -398,7 +398,7 @@ double PPPMStagger::compute_qopt_ad() // each proc calculates contributions from every Pth grid point bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm; - int nxy_pppm = nx_pppm * ny_pppm; + bigint nxy_pppm = (bigint) nx_pppm * ny_pppm; double qopt = 0.0; diff --git a/src/LATBOLTZ/fix_lb_fluid.cpp b/src/LATBOLTZ/fix_lb_fluid.cpp index f692d28084..f3d8f45142 100644 --- a/src/LATBOLTZ/fix_lb_fluid.cpp +++ b/src/LATBOLTZ/fix_lb_fluid.cpp @@ -4430,9 +4430,9 @@ void FixLbFluid::calc_MPT(double &totalmass, double totalmomentum[3], double &Ta ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ -int FixLbFluid::adjust_dof_fix() /* Based on same private method in compute class */ -{ /* altered to return fix_dof */ - int fix_dof = 0; +bigint FixLbFluid::adjust_dof_fix() /* Based on same private method in compute class */ +{ /* altered to return fix_dof */ + bigint fix_dof = 0; for (auto &ifix : modify->get_fix_list()) if (ifix->dof_flag) fix_dof += ifix->dof(igroup); return fix_dof; diff --git a/src/LATBOLTZ/fix_lb_fluid.h b/src/LATBOLTZ/fix_lb_fluid.h index 19cd2c6dc3..f134b50901 100644 --- a/src/LATBOLTZ/fix_lb_fluid.h +++ b/src/LATBOLTZ/fix_lb_fluid.h @@ -182,7 +182,7 @@ class FixLbFluid : public Fix { void calc_fluidforceII(void); void calc_fluidforceweight(void); - int adjust_dof_fix(); + bigint adjust_dof_fix(); double dof_compute(); /* nanopit parameters */ diff --git a/src/LEPTON/angle_lepton.cpp b/src/LEPTON/angle_lepton.cpp index 59310f5637..9fe565f8ee 100644 --- a/src/LEPTON/angle_lepton.cpp +++ b/src/LEPTON/angle_lepton.cpp @@ -44,6 +44,7 @@ AngleLepton::AngleLepton(LAMMPS *_lmp) : { writedata = 1; reinitflag = 0; + auto_offset = 1; } /* ---------------------------------------------------------------------- */ @@ -90,10 +91,21 @@ template void AngleLepton::eval() { std::vector angleforce; std::vector anglepot; - for (const auto &expr : expressions) { - auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp)); - angleforce.emplace_back(parsed.differentiate("theta").createCompiledExpression()); - if (EFLAG) anglepot.emplace_back(parsed.createCompiledExpression()); + std::vector has_ref; + try { + for (const auto &expr : expressions) { + auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp)); + angleforce.emplace_back(parsed.differentiate("theta").createCompiledExpression()); + has_ref.push_back(true); + try { + angleforce.back().getVariableReference("theta"); + } catch (Lepton::Exception &) { + has_ref.back() = false; + } + if (EFLAG) anglepot.emplace_back(parsed.createCompiledExpression()); + } + } catch (std::exception &e) { + error->all(FLERR, e.what()); } const double *const *const x = atom->x; @@ -142,8 +154,7 @@ template void AngleLepton::eval() const double dtheta = acos(c) - theta0[type]; const int idx = type2expression[type]; - angleforce[idx].getVariableReference("theta") = dtheta; - + if (has_ref[idx]) angleforce[idx].getVariableReference("theta") = dtheta; const double a = -angleforce[idx].evaluate() * s; const double a11 = a * c / rsq1; const double a12 = -a / (r1 * r2); @@ -179,7 +190,11 @@ template void AngleLepton::eval() double eangle = 0.0; if (EFLAG) { - anglepot[idx].getVariableReference("theta") = dtheta; + try { + anglepot[idx].getVariableReference("theta") = dtheta; + } catch (Lepton::Exception &) { + ; // ignore -> constant force + } eangle = anglepot[idx].evaluate() - offset[type]; } if (EVFLAG) @@ -202,6 +217,24 @@ void AngleLepton::allocate() for (int i = 1; i < np1; i++) setflag[i] = 0; } +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void AngleLepton::settings(int narg, char **arg) +{ + auto_offset = 1; + if (narg > 0) { + if (strcmp(arg[0],"auto_offset") == 0) { + auto_offset = 1; + } else if (strcmp(arg[0],"no_offset") == 0) { + auto_offset = 0; + } else { + error->all(FLERR, "Unknown angle style lepton setting {}", arg[0]); + } + } +} + /* ---------------------------------------------------------------------- set coeffs for one or more types ------------------------------------------------------------------------- */ @@ -224,9 +257,20 @@ void AngleLepton::coeff(int narg, char **arg) auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp)); auto anglepot = parsed.createCompiledExpression(); auto angleforce = parsed.differentiate("theta").createCompiledExpression(); - anglepot.getVariableReference("theta") = 0.0; - angleforce.getVariableReference("theta") = 0.0; - offset_one = anglepot.evaluate(); + try { + anglepot.getVariableReference("theta") = 0.0; + } catch (Lepton::Exception &) { + if (comm->me == 0) + error->warning(FLERR, "Lepton potential expression {} does not depend on 'theta'", exp_one); + } + try { + angleforce.getVariableReference("theta") = 0.0; + } catch (Lepton::Exception &) { + if (comm->me == 0) + error->warning(FLERR, "Force from Lepton expression {} does not depend on 'theta'", + exp_one); + } + if (auto_offset) offset_one = anglepot.evaluate(); angleforce.evaluate(); } catch (std::exception &e) { error->all(FLERR, e.what()); @@ -284,6 +328,7 @@ void AngleLepton::write_restart(FILE *fp) fwrite(&n, sizeof(int), 1, fp); fwrite(exp.c_str(), sizeof(char), n, fp); } + fwrite(&auto_offset, sizeof(int), 1, fp); } /* ---------------------------------------------------------------------- @@ -323,6 +368,9 @@ void AngleLepton::read_restart(FILE *fp) expressions.emplace_back(buf); } + if (comm->me == 0) utils::sfread(FLERR, &auto_offset, sizeof(int), 1, fp, nullptr, error); + MPI_Bcast(&auto_offset, 1, MPI_INT, 0, world); + delete[] buf; } @@ -363,7 +411,11 @@ double AngleLepton::single(int type, int i1, int i2, int i3) const auto &expr = expressions[type2expression[type]]; auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp)); auto anglepot = parsed.createCompiledExpression(); - anglepot.getVariableReference("theta") = dtheta; + try { + anglepot.getVariableReference("theta") = dtheta; + } catch (Lepton::Exception &) { + ; // ignore -> constant potential + } return anglepot.evaluate() - offset[type]; } diff --git a/src/LEPTON/angle_lepton.h b/src/LEPTON/angle_lepton.h index 67d2718fb6..4f0e5729ed 100644 --- a/src/LEPTON/angle_lepton.h +++ b/src/LEPTON/angle_lepton.h @@ -29,6 +29,7 @@ class AngleLepton : public Angle { AngleLepton(class LAMMPS *); ~AngleLepton() override; void compute(int, int) override; + void settings(int, char **) override; void coeff(int, char **) override; double equilibrium_angle(int) override; void write_restart(FILE *) override; @@ -42,6 +43,7 @@ class AngleLepton : public Angle { double *theta0; int *type2expression; double *offset; + int auto_offset; virtual void allocate(); diff --git a/src/LEPTON/bond_lepton.cpp b/src/LEPTON/bond_lepton.cpp index 773607782d..8679d0ed62 100644 --- a/src/LEPTON/bond_lepton.cpp +++ b/src/LEPTON/bond_lepton.cpp @@ -37,6 +37,7 @@ BondLepton::BondLepton(LAMMPS *_lmp) : { writedata = 1; reinitflag = 0; + auto_offset = 1; } /* ---------------------------------------------------------------------- */ @@ -82,10 +83,17 @@ template void BondLepton::eval() { std::vector bondforce; std::vector bondpot; + std::vector has_ref; try { for (const auto &expr : expressions) { auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp)); bondforce.emplace_back(parsed.differentiate("r").createCompiledExpression()); + has_ref.push_back(true); + try { + bondforce.back().getVariableReference("r"); + } catch (Lepton::Exception &) { + has_ref.back() = false; + } if (EFLAG) bondpot.emplace_back(parsed.createCompiledExpression()); } } catch (std::exception &e) { @@ -116,7 +124,7 @@ template void BondLepton::eval() double fbond = 0.0; if (r > 0.0) { - bondforce[idx].getVariableReference("r") = dr; + if (has_ref[idx]) bondforce[idx].getVariableReference("r") = dr; fbond = -bondforce[idx].evaluate() / r; } @@ -136,7 +144,11 @@ template void BondLepton::eval() double ebond = 0.0; if (EFLAG) { - bondpot[idx].getVariableReference("r") = dr; + try { + bondpot[idx].getVariableReference("r") = dr; + } catch (Lepton::Exception &) { + ; // ignore -> constant potential + } ebond = bondpot[idx].evaluate() - offset[type]; } if (EVFLAG) ev_tally(i1, i2, nlocal, NEWTON_BOND, ebond, fbond, delx, dely, delz); @@ -157,6 +169,24 @@ void BondLepton::allocate() for (int i = 1; i < np1; i++) setflag[i] = 0; } +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void BondLepton::settings(int narg, char **arg) +{ + auto_offset = 1; + if (narg > 0) { + if (strcmp(arg[0],"auto_offset") == 0) { + auto_offset = 1; + } else if (strcmp(arg[0],"no_offset") == 0) { + auto_offset = 0; + } else { + error->all(FLERR, "Unknown bond style lepton setting {}", arg[0]); + } + } +} + /* ---------------------------------------------------------------------- set coeffs for one or more types ------------------------------------------------------------------------- */ @@ -179,9 +209,19 @@ void BondLepton::coeff(int narg, char **arg) auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp)); auto bondpot = parsed.createCompiledExpression(); auto bondforce = parsed.differentiate("r").createCompiledExpression(); - bondpot.getVariableReference("r") = 0.0; - bondforce.getVariableReference("r") = 0.0; - offset_one = bondpot.evaluate(); + try { + bondpot.getVariableReference("r") = 0.0; + } catch (Lepton::Exception &e) { + if (comm->me == 0) + error->warning(FLERR, "Lepton potential expression {} does not depend on 'r'", exp_one); + } + try { + bondforce.getVariableReference("r") = 0.0; + } catch (Lepton::Exception &e) { + if (comm->me == 0) + error->warning(FLERR, "Force from Lepton expression {} does not depend on 'r'", exp_one); + } + if (auto_offset) offset_one = bondpot.evaluate(); bondforce.evaluate(); } catch (std::exception &e) { error->all(FLERR, e.what()); @@ -239,6 +279,7 @@ void BondLepton::write_restart(FILE *fp) fwrite(&n, sizeof(int), 1, fp); fwrite(exp.c_str(), sizeof(char), n, fp); } + fwrite(&auto_offset, sizeof(int), 1, fp); } /* ---------------------------------------------------------------------- @@ -278,6 +319,9 @@ void BondLepton::read_restart(FILE *fp) expressions.emplace_back(buf); } + if (comm->me == 0) utils::sfread(FLERR, &auto_offset, sizeof(int), 1, fp, nullptr, error); + MPI_Bcast(&auto_offset, 1, MPI_INT, 0, world); + delete[] buf; } @@ -302,8 +346,12 @@ double BondLepton::single(int type, double rsq, int /*i*/, int /*j*/, double &ff auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp)); auto bondpot = parsed.createCompiledExpression(); auto bondforce = parsed.differentiate("r").createCompiledExpression(); - bondforce.getVariableReference("r") = dr; - bondpot.getVariableReference("r") = dr; + try { + bondpot.getVariableReference("r") = dr; + bondforce.getVariableReference("r") = dr; + } catch (Lepton::Exception &) { + ; // ignore -> constant potential or force + } // force and energy diff --git a/src/LEPTON/bond_lepton.h b/src/LEPTON/bond_lepton.h index 9e693298a7..e59648a3f0 100644 --- a/src/LEPTON/bond_lepton.h +++ b/src/LEPTON/bond_lepton.h @@ -29,6 +29,7 @@ class BondLepton : public Bond { BondLepton(class LAMMPS *); ~BondLepton() override; void compute(int, int) override; + void settings(int, char **) override; void coeff(int, char **) override; double equilibrium_distance(int) override; void write_restart(FILE *) override; @@ -42,6 +43,7 @@ class BondLepton : public Bond { double *r0; int *type2expression; double *offset; + int auto_offset; virtual void allocate(); diff --git a/src/LEPTON/dihedral_lepton.cpp b/src/LEPTON/dihedral_lepton.cpp index 6470e43033..069ff13d74 100644 --- a/src/LEPTON/dihedral_lepton.cpp +++ b/src/LEPTON/dihedral_lepton.cpp @@ -92,10 +92,17 @@ template void DihedralLepton::eval() { std::vector dihedralforce; std::vector dihedralpot; + std::vector has_ref; try { for (const auto &expr : expressions) { auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp)); dihedralforce.emplace_back(parsed.differentiate("phi").createCompiledExpression()); + has_ref.push_back(true); + try { + dihedralforce.back().getVariableReference("phi"); + } catch (Lepton::Exception &) { + has_ref.back() = false; + } if (EFLAG) dihedralpot.emplace_back(parsed.createCompiledExpression()); } } catch (std::exception &e) { @@ -278,7 +285,7 @@ template void DihedralLepton::eval() } const int idx = type2expression[type]; - dihedralforce[idx].getVariableReference("phi") = phi; + if (has_ref[idx]) dihedralforce[idx].getVariableReference("phi") = phi; double m_du_dphi = -dihedralforce[idx].evaluate(); // ----- Step 4: Calculate the force direction in real space ----- @@ -322,7 +329,11 @@ template void DihedralLepton::eval() double edihedral = 0.0; if (EFLAG) { - dihedralpot[idx].getVariableReference("phi") = phi; + try { + dihedralpot[idx].getVariableReference("phi") = phi; + } catch (Lepton::Exception &) { + ; // ignore -> constant potential + } edihedral = dihedralpot[idx].evaluate(); } if (EVFLAG) @@ -362,8 +373,18 @@ void DihedralLepton::coeff(int narg, char **arg) auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp)); auto dihedralpot = parsed.createCompiledExpression(); auto dihedralforce = parsed.differentiate("phi").createCompiledExpression(); - dihedralpot.getVariableReference("phi") = 0.0; - dihedralforce.getVariableReference("phi") = 0.0; + try { + dihedralpot.getVariableReference("phi") = 0.0; + } catch (Lepton::Exception &) { + if (comm->me == 0) + error->warning(FLERR, "Lepton potential expression {} does not depend on 'phi'", exp_one); + } + try { + dihedralforce.getVariableReference("phi") = 0.0; + } catch (Lepton::Exception &) { + if (comm->me == 0) + error->warning(FLERR, "Force from Lepton expression {} does not depend on 'phi'", exp_one); + } dihedralforce.evaluate(); } catch (std::exception &e) { error->all(FLERR, e.what()); diff --git a/src/LEPTON/fix_wall_lepton.cpp b/src/LEPTON/fix_wall_lepton.cpp index a81d3c4edb..7530188c00 100644 --- a/src/LEPTON/fix_wall_lepton.cpp +++ b/src/LEPTON/fix_wall_lepton.cpp @@ -13,6 +13,7 @@ #include "fix_wall_lepton.h" #include "atom.h" +#include "comm.h" #include "error.h" #include "Lepton.h" @@ -41,8 +42,18 @@ void FixWallLepton::post_constructor() auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp)); auto wallpot = parsed.createCompiledExpression(); auto wallforce = parsed.differentiate("r").createCompiledExpression(); - wallpot.getVariableReference("r") = 0.0; - wallforce.getVariableReference("r") = 0.0; + try { + wallpot.getVariableReference("r") = 0.0; + } catch (Lepton::Exception &) { + if (comm->me == 0) + error->warning(FLERR, "Lepton potential expression {} does not depend on 'r'", exp_one); + } + try { + wallforce.getVariableReference("r") = 0.0; + } catch (Lepton::Exception &) { + if (comm->me == 0) + error->warning(FLERR, "Force from Lepton expression {} does not depend on 'r'", exp_one); + } wallpot.evaluate(); wallforce.evaluate(); } catch (std::exception &e) { diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp index a8af0ce576..adc07cbfa8 100644 --- a/src/LEPTON/pair_lepton.cpp +++ b/src/LEPTON/pair_lepton.cpp @@ -27,6 +27,7 @@ #include "Lepton.h" #include "lepton_utils.h" +#include #include #include @@ -105,11 +106,17 @@ template void PairLepton::eval() std::vector pairforce; std::vector pairpot; + std::vector has_ref; try { for (const auto &expr : expressions) { auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp), functions); pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression()); - pairforce.back().getVariableReference("r"); + has_ref.push_back(true); + try { + pairforce.back().getVariableReference("r"); + } catch (Lepton::Exception &) { + has_ref.back() = false; + } if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression()); } } catch (std::exception &e) { @@ -142,8 +149,7 @@ template void PairLepton::eval() if (rsq < cutsq[itype][jtype]) { const double r = sqrt(rsq); const int idx = type2expression[itype][jtype]; - double &r_for = pairforce[idx].getVariableReference("r"); - r_for = r; + if (has_ref[idx]) pairforce[idx].getVariableReference("r") = r; const double fpair = -pairforce[idx].evaluate() / r * factor_lj; fxtmp += delx * fpair; @@ -157,7 +163,11 @@ template void PairLepton::eval() double evdwl = 0.0; if (EFLAG) { - pairpot[idx].getVariableReference("r") = r; + try { + pairpot[idx].getVariableReference("r") = r; + } catch (Lepton::Exception &) { + ; // ignore -> constant potential + } evdwl = pairpot[idx].evaluate() - offset[itype][jtype]; evdwl *= factor_lj; } @@ -229,8 +239,12 @@ void PairLepton::coeff(int narg, char **arg) auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp), functions); auto pairforce = parsed.differentiate("r").createCompiledExpression(); auto pairpot = parsed.createCompiledExpression(); - pairpot.getVariableReference("r") = 1.0; - pairforce.getVariableReference("r") = 1.0; + try { + pairpot.getVariableReference("r") = 1.0; + pairforce.getVariableReference("r") = 1.0; + } catch (Lepton::Exception &) { + ; // ignore -> constant potential or force + } pairpot.evaluate(); pairforce.evaluate(); } catch (std::exception &e) { @@ -270,7 +284,11 @@ double PairLepton::init_one(int i, int j) try { auto expr = LeptonUtils::substitute(expressions[type2expression[i][j]], lmp); auto pairpot = Lepton::Parser::parse(expr, functions).createCompiledExpression(); - pairpot.getVariableReference("r") = cut[i][j]; + try { + pairpot.getVariableReference("r") = cut[i][j]; + } catch (Lepton::Exception &) { + ; // ignore -> constant potential + } offset[i][j] = pairpot.evaluate(); } catch (std::exception &) { } @@ -429,9 +447,12 @@ double PairLepton::single(int /* i */, int /* j */, int itype, int jtype, double auto pairforce = parsed.differentiate("r").createCompiledExpression(); const double r = sqrt(rsq); - pairpot.getVariableReference("r") = r; - pairforce.getVariableReference("r") = r; - + try { + pairpot.getVariableReference("r") = r; + pairforce.getVariableReference("r") = r; + } catch (Lepton::Exception &) { + ; // ignore -> constant potential or force + } fforce = -pairforce.evaluate() / r * factor_lj; return (pairpot.evaluate() - offset[itype][jtype]) * factor_lj; } diff --git a/src/LEPTON/pair_lepton_coul.cpp b/src/LEPTON/pair_lepton_coul.cpp index 841565e874..f7d2042874 100644 --- a/src/LEPTON/pair_lepton_coul.cpp +++ b/src/LEPTON/pair_lepton_coul.cpp @@ -28,6 +28,8 @@ #include "Lepton.h" #include "lepton_utils.h" + +#include #include using namespace LAMMPS_NS; @@ -79,25 +81,30 @@ template void PairLeptonCoul::eval() std::vector pairforce; std::vector pairpot; - std::vector> have_q; + std::vector> has_ref; try { for (const auto &expr : expressions) { auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp), functions); pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression()); + has_ref.push_back({true, true, true}); + try { + pairforce.back().getVariableReference("r"); + } catch (Lepton::Exception &) { + has_ref.back()[0] = false; + } if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression()); - pairforce.back().getVariableReference("r"); - have_q.emplace_back(true, true); // check if there are references to charges + try { pairforce.back().getVariableReference("qi"); - } catch (std::exception &) { - have_q.back().first = false; + } catch (Lepton::Exception &) { + has_ref.back()[1] = false; } try { pairforce.back().getVariableReference("qj"); - } catch (std::exception &) { - have_q.back().second = false; + } catch (Lepton::Exception &) { + has_ref.back()[2] = false; } } } catch (std::exception &e) { @@ -130,9 +137,9 @@ template void PairLeptonCoul::eval() if (rsq < cutsq[itype][jtype]) { const double r = sqrt(rsq); const int idx = type2expression[itype][jtype]; - pairforce[idx].getVariableReference("r") = r; - if (have_q[idx].first) pairforce[idx].getVariableReference("qi") = q2e * q[i]; - if (have_q[idx].second) pairforce[idx].getVariableReference("qj") = q2e * q[j]; + if (has_ref[idx][0]) pairforce[idx].getVariableReference("r") = r; + if (has_ref[idx][1]) pairforce[idx].getVariableReference("qi") = q2e * q[i]; + if (has_ref[idx][2]) pairforce[idx].getVariableReference("qj") = q2e * q[j]; const double fpair = -pairforce[idx].evaluate() / r * factor_coul; fxtmp += delx * fpair; @@ -146,9 +153,14 @@ template void PairLeptonCoul::eval() double ecoul = 0.0; if (EFLAG) { - pairpot[idx].getVariableReference("r") = r; - if (have_q[idx].first) pairpot[idx].getVariableReference("qi") = q2e * q[i]; - if (have_q[idx].second) pairpot[idx].getVariableReference("qj") = q2e * q[j]; + try { + pairpot[idx].getVariableReference("r") = r; + } catch (Lepton::Exception &) { + ; // ignore -> constant potential + } + if (has_ref[idx][1]) pairpot[idx].getVariableReference("qi") = q2e * q[i]; + if (has_ref[idx][2]) pairpot[idx].getVariableReference("qj") = q2e * q[j]; + ecoul = pairpot[idx].evaluate(); ecoul *= factor_coul; } @@ -249,18 +261,22 @@ double PairLeptonCoul::single(int i, int j, int itype, int jtype, double rsq, do const double r = sqrt(rsq); const double q2e = sqrt(force->qqrd2e); - pairpot.getVariableReference("r") = r; - pairforce.getVariableReference("r") = r; + try { + pairpot.getVariableReference("r") = r; + pairforce.getVariableReference("r") = r; + } catch (Lepton::Exception &) { + ; // ignore -> constant potential or force + } try { pairpot.getVariableReference("qi") = q2e * atom->q[i]; pairforce.getVariableReference("qi") = q2e * atom->q[i]; - } catch (std::exception &) { + } catch (Lepton::Exception &) { /* ignore */ } try { pairpot.getVariableReference("qj") = q2e * atom->q[j]; pairforce.getVariableReference("qj") = q2e * atom->q[j]; - } catch (std::exception &) { + } catch (Lepton::Exception &) { /* ignore */ } diff --git a/src/LEPTON/pair_lepton_sphere.cpp b/src/LEPTON/pair_lepton_sphere.cpp index 29514aed38..72d0e85d0b 100644 --- a/src/LEPTON/pair_lepton_sphere.cpp +++ b/src/LEPTON/pair_lepton_sphere.cpp @@ -28,6 +28,7 @@ #include "Lepton.h" #include "lepton_utils.h" +#include #include using namespace LAMMPS_NS; @@ -77,25 +78,30 @@ template void PairLeptonSphere::eval() std::vector pairforce; std::vector pairpot; - std::vector> have_rad; + std::vector> has_ref; try { for (const auto &expr : expressions) { auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp), functions); pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression()); + has_ref.push_back({true, true, true}); + try { + pairforce.back().getVariableReference("r"); + } catch (Lepton::Exception &) { + has_ref.back()[0] = false; + } if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression()); - pairforce.back().getVariableReference("r"); - have_rad.emplace_back(true, true); - // check if there are references to charges + // check if there are references to radii + try { pairforce.back().getVariableReference("radi"); - } catch (std::exception &) { - have_rad.back().first = false; + } catch (Lepton::Exception &) { + has_ref.back()[1] = false; } try { pairforce.back().getVariableReference("radj"); - } catch (std::exception &) { - have_rad.back().second = false; + } catch (Lepton::Exception &) { + has_ref.back()[2] = false; } } } catch (std::exception &e) { @@ -128,9 +134,9 @@ template void PairLeptonSphere::eval() if (rsq < cutsq[itype][jtype]) { const double r = sqrt(rsq); const int idx = type2expression[itype][jtype]; - pairforce[idx].getVariableReference("r") = r; - if (have_rad[idx].first) pairforce[idx].getVariableReference("radi") = radius[i]; - if (have_rad[idx].second) pairforce[idx].getVariableReference("radj") = radius[j]; + if (has_ref[idx][0]) pairforce[idx].getVariableReference("r") = r; + if (has_ref[idx][1]) pairforce[idx].getVariableReference("radi") = radius[i]; + if (has_ref[idx][2]) pairforce[idx].getVariableReference("radj") = radius[j]; const double fpair = -pairforce[idx].evaluate() / r * factor_lj; fxtmp += delx * fpair; @@ -144,9 +150,14 @@ template void PairLeptonSphere::eval() double evdwl = 0.0; if (EFLAG) { - pairpot[idx].getVariableReference("r") = r; - if (have_rad[idx].first) pairpot[idx].getVariableReference("radi") = radius[i]; - if (have_rad[idx].second) pairpot[idx].getVariableReference("radj") = radius[j]; + try { + pairpot[idx].getVariableReference("r") = r; + } catch (Lepton::Exception &) { + ; // ignore -> constant potential + } + if (has_ref[idx][1]) pairpot[idx].getVariableReference("radi") = radius[i]; + if (has_ref[idx][2]) pairpot[idx].getVariableReference("radj") = radius[j]; + evdwl = pairpot[idx].evaluate(); evdwl *= factor_lj; } @@ -211,19 +222,23 @@ double PairLeptonSphere::single(int i, int j, int itype, int jtype, double rsq, auto pairforce = parsed.differentiate("r").createCompiledExpression(); const double r = sqrt(rsq); - pairpot.getVariableReference("r") = r; - pairforce.getVariableReference("r") = r; + try { + pairpot.getVariableReference("r") = r; + pairforce.getVariableReference("r") = r; + } catch (Lepton::Exception &) { + ; // ignore -> constant potential or force + } try { pairpot.getVariableReference("radi") = atom->radius[i]; pairforce.getVariableReference("radi") = atom->radius[i]; - } catch (std::exception &) { - /* ignore */ + } catch (Lepton::Exception &) { + ; // ignore } try { pairpot.getVariableReference("radj") = atom->radius[j]; pairforce.getVariableReference("radj") = atom->radius[j]; - } catch (std::exception &) { - /* ignore */ + } catch (Lepton::Exception &) { + ; // ignore } fforce = -pairforce.evaluate() / r * factor_lj; diff --git a/src/MAKE/MACHINES/Makefile.bgq b/src/MAKE/MACHINES/Makefile.bgq deleted file mode 100644 index 4baecb9fc3..0000000000 --- a/src/MAKE/MACHINES/Makefile.bgq +++ /dev/null @@ -1,60 +0,0 @@ -# bgq = IBM Blue Gene/Q, multiple compiler options, native MPI, ALCF FFTW2 - -SHELL = /bin/bash -.SUFFIXES: .cpp .u - -# --------------------------------------------------------------------- -# build rules and dependencies -# do not edit this section -# select which compiler by editing Makefile.bgq.details - -include ../MAKE/MACHINES/bgq.make.details - -include Makefile.package.settings -include Makefile.package - -EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) -EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) -EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) $(DYN_LIB) -EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS) -EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS) - -# Path to src files - -vpath %.cpp .. -vpath %.h .. - -# Link target - -$(EXE): main.o $(LMPLIB) $(EXTRA_LINK_DEPENDS) - $(LINK) $(LINKFLAGS) main.o $(EXTRA_PATH) $(LMPLINK) $(EXTRA_LIB) $(LIB) -o $@ - $(SIZE) $@ - -# Library targets - -$(ARLIB): $(OBJ) $(EXTRA_LINK_DEPENDS) - @rm -f ../$(ARLIB) - $(ARCHIVE) $(ARFLAGS) ../$(ARLIB) $(OBJ) - @rm -f $(ARLIB) - @ln -s ../$(ARLIB) $(ARLIB) - -$(SHLIB): $(OBJ) $(EXTRA_LINK_DEPENDS) - $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o ../$(SHLIB) \ - $(OBJ) $(EXTRA_LIB) $(LIB) - @rm -f $(SHLIB) - @ln -s ../$(SHLIB) $(SHLIB) - -# Compilation rules - -%.o:%.cpp - $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< - -# Individual dependencies - -depend : fastdep.exe $(SRC) - @./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1 - -fastdep.exe: ../DEPEND/fastdep.c - cc -O -o $@ $< - -sinclude .depend diff --git a/src/MAKE/MACHINES/Makefile.xe6 b/src/MAKE/MACHINES/Makefile.xe6 deleted file mode 100644 index 9dbe0ba73e..0000000000 --- a/src/MAKE/MACHINES/Makefile.xe6 +++ /dev/null @@ -1,125 +0,0 @@ -# xe6 = Cray XE6, Cray CC, native MPI, FFTW - -SHELL = /bin/sh -.SUFFIXES: .cpp .d - -# --------------------------------------------------------------------- -# compiler/linker settings -# specify flags and libraries needed for your compiler - -CC = CC -CCFLAGS = -fastsse -SHFLAGS = -fPIC -DEPFLAGS = -M - -LINK = CC -LINKFLAGS = -O -LIB = -lstdc++ -SIZE = size - -ARCHIVE = ar -ARFLAGS = -rc -SHLIBFLAGS = -shared - -# --------------------------------------------------------------------- -# LAMMPS-specific settings, all OPTIONAL -# specify settings for LAMMPS features you will use -# if you change any -D setting, do full re-compile after "make clean" - -# LAMMPS ifdef settings -# see possible settings in Section 3.5 of the manual - -LMP_INC = -DLAMMPS_GZIP - -# MPI library -# see discussion in Section 3.4 of the manual -# MPI wrapper compiler/linker can provide this info -# can point to dummy MPI library in src/STUBS as in Makefile.serial -# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts -# INC = path for mpi.h, MPI compiler settings -# PATH = path for MPI library -# LIB = name of MPI library - -MPI_INC = -DMPICH_SKIP_MPICXX -MPI_PATH = -MPI_LIB = - -# FFT library -# see discussion in Section 3.5.2 of manual -# can be left blank to use provided KISS FFT library -# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings -# PATH = path for FFT library -# LIB = name of FFT library - -FFT_INC = -FFT_PATH = -FFT_LIB = - -# JPEG and/or PNG library -# see discussion in Section 3.5.4 of manual -# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC -# INC = path(s) for jpeglib.h and/or png.h -# PATH = path(s) for JPEG library and/or PNG library -# LIB = name(s) of JPEG library and/or PNG library - -JPG_INC = -JPG_PATH = -JPG_LIB = - -# library for loading shared objects (defaults to -ldl, should be empty on Windows) -# uncomment to change the default - -# override DYN_LIB = - -# --------------------------------------------------------------------- -# build rules and dependencies -# do not edit this section - -include Makefile.package.settings -include Makefile.package - -EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) -EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) -EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) $(DYN_LIB) -EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS) -EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS) - -# Path to src files - -vpath %.cpp .. -vpath %.h .. - -# Link target - -$(EXE): main.o $(LMPLIB) $(EXTRA_LINK_DEPENDS) - $(LINK) $(LINKFLAGS) main.o $(EXTRA_PATH) $(LMPLINK) $(EXTRA_LIB) $(LIB) -o $@ - $(SIZE) $@ - -# Library targets - -$(ARLIB): $(OBJ) $(EXTRA_LINK_DEPENDS) - @rm -f ../$(ARLIB) - $(ARCHIVE) $(ARFLAGS) ../$(ARLIB) $(OBJ) - @rm -f $(ARLIB) - @ln -s ../$(ARLIB) $(ARLIB) - -$(SHLIB): $(OBJ) $(EXTRA_LINK_DEPENDS) - $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o ../$(SHLIB) \ - $(OBJ) $(EXTRA_LIB) $(LIB) - @rm -f $(SHLIB) - @ln -s ../$(SHLIB) $(SHLIB) - -# Compilation rules - -%.o:%.cpp - $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< - -# Individual dependencies - -depend : fastdep.exe $(SRC) - @./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1 - -fastdep.exe: ../DEPEND/fastdep.c - cc -O -o $@ $< - -sinclude .depend diff --git a/src/MAKE/MACHINES/bgq.make.details b/src/MAKE/MACHINES/bgq.make.details deleted file mode 100644 index 0febd69d26..0000000000 --- a/src/MAKE/MACHINES/bgq.make.details +++ /dev/null @@ -1,125 +0,0 @@ -# multiple compiler options for BGQ - -# --------------------------------------------------------------------- -# compiler/linker settings -# specify flags and libraries needed for your compiler - -# uncomment one and only one of the following three lines -# to choose a compiler toolchain - -#COMPILER = GCC -#COMPILER = LLVM -COMPILER = XLC - -ifeq ($(COMPILER),XLC) -CC = /bgsys/drivers/ppcfloor/comm/xl.ndebug/bin/mpixlcxx_r -CCFLAGS = -O3 -qarch=qp -qtune=qp -qsmp=omp -qsimd=auto -qhot=level=2 -qprefetch -qunroll=yes -FC = /bgsys/drivers/ppcfloor/comm/xl.ndebug/bin/mpixlf90_r -FFLAGS = -O3 -qarch=qp -qtune=qp -qsimd=auto -qhot=level=2 -qprefetch -qunroll=yes -qsmp=omp -qextname -qnosave -DEPFLAGS = -M -qmakedep=gcc -endif - -ifeq ($(COMPILER),GCC) -CC = /bgsys/drivers/ppcfloor/comm/gcc.legacy/bin/mpicxx -CCFLAGS = -O3 -fopenmp -FC = /bgsys/drivers/ppcfloor/comm/gcc.legacy/bin/mpif90 -FFLAGS = -O3 -fopenmp -DEPFLAGS = -M -endif - -ifeq ($(COMPILER),LLVM) -#CC = bgclang++ -CC = /home/projects/llvm/mpi/bgclang/bin/mpiclang++ -CCFLAGS = -O3 -fopenmp -DEPFLAGS = -M -FC = /bin/false -FFLAGS = LLVM does not have a Fortran front-end! -endif - -LINK = $(CC) -LINKFLAGS = $(CCFLAGS) - -ifeq ($(COMPILER),XLC) - MASS_LIB = ${IBM_MAIN_DIR}/xlmass/bg/7.3/bglib64 - XLF_LIB = ${IBM_MAIN_DIR}/xlf/bg/14.1/bglib64 - XLSMP_LIB = ${IBM_MAIN_DIR}/xlsmp/bg/3.1/bglib64 - LIB += -L${MASS_LIB} -L${XLF_LIB} -L${XLSMP_LIB} - LIB += -lmassv -lmass - LIB += -lxlf90_r -lxlsmp -lxlopt -lxlfmath -lxl -endif - -ifeq ($(COMPILER),GCC) -# libm is definitely slower than libmass... - LIB += -lm -lgfortran -endif - -ifeq ($(COMPILER),LLVM) - SLEEF_DIR = /home/projects/llvm/sleef - LIB += -L${SLEEF_DIR}/lib -lsleef -endif - -SIZE = size - -ARCHIVE = ar -ARFLAGS = -rc - -# BGQ should not use shared libraries - -SHFLAGS = -SHLIBFLAGS = - -# --------------------------------------------------------------------- -# LAMMPS-specific settings, all OPTIONAL -# specify settings for LAMMPS features you will use -# if you change any -D setting, do full re-compile after "make clean" - -# LAMMPS ifdef settings -# see possible settings in Section 3.5 of the manual - -LMP_INC = -DLAMMPS_GZIP - -# MPI library -# see discussion in Section 3.4 of the manual -# MPI wrapper compiler/linker can provide this info -# can point to dummy MPI library in src/STUBS as in Makefile.serial -# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts -# INC = path for mpi.h, MPI compiler settings -# PATH = path for MPI library -# LIB = name of MPI library - -MPI_INC = -MPI_PATH = -MPI_LIB = - -MPI_INC += -DMPICH_SKIP_MPICXX -MPI_LIB += #/home/jhammond/OSPRI/branches/marpn/wrap/libmpiarbrpn.a - -# FFT library -# see discussion in Section 3.5.2 of manual -# can be left blank to use provided KISS FFT library -# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings -# PATH = path for FFT library -# LIB = name of FFT library - -FFT_INC = -I/soft/libraries/alcf/current/xl/FFTW2/include -DFFT_FFTW2 -DFFTW_SIZE -FFT_PATH = #/soft/libraries/alcf/current/xl/FFTW2 -FFT_LIB = -L/soft/libraries/alcf/current/xl/FFTW2/lib -ldfftw - -# JPEG and/or PNG library -# see discussion in Section 3.5.4 of manual -# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC -# INC = path(s) for jpeglib.h and/or png.h -# PATH = path(s) for JPEG library and/or PNG library -# LIB = name(s) of JPEG library and/or PNG library - -JPG_INC = -JPG_PATH = -JPG_LIB = - -depend : fastdep.exe $(SRC) - @./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1 - -fastdep.exe: ../DEPEND/fastdep.c - cc -O -o $@ $< - -sinclude .depend diff --git a/src/MANIFOLD/fix_nve_manifold_rattle.cpp b/src/MANIFOLD/fix_nve_manifold_rattle.cpp index b1efea951f..dc0492dbe9 100644 --- a/src/MANIFOLD/fix_nve_manifold_rattle.cpp +++ b/src/MANIFOLD/fix_nve_manifold_rattle.cpp @@ -287,21 +287,21 @@ void FixNVEManifoldRattle::update_var_params() /* ----------------------------------------------------------------------------- ---------------------------------------------------------------------------*/ -int FixNVEManifoldRattle::dof(int /*igroup*/) +bigint FixNVEManifoldRattle::dof(int /*igroup*/) { int *mask = atom->mask; int nlocal = atom->nlocal; - int natoms = 0; + bigint natoms = 0; for (int i = 0; i < nlocal; ++i) { if (mask[i] & groupbit) ++natoms; } - int dofs; - MPI_Allreduce( &natoms, &dofs, 1, MPI_INT, MPI_SUM, world ); + bigint dofs; + MPI_Allreduce( &natoms, &dofs, 1, MPI_LMP_BIGINT, MPI_SUM, world ); // Make sure that, if there is just no or one atom, no dofs are subtracted, // since for the first atom already 3 dofs are subtracted because of the - // centre of mass corrections: + // center of mass corrections: if (dofs <= 1) dofs = 0; stats.dofs_removed = dofs; diff --git a/src/MANIFOLD/fix_nve_manifold_rattle.h b/src/MANIFOLD/fix_nve_manifold_rattle.h index 3eae9c4bc3..7c9e302094 100644 --- a/src/MANIFOLD/fix_nve_manifold_rattle.h +++ b/src/MANIFOLD/fix_nve_manifold_rattle.h @@ -75,7 +75,7 @@ class FixNVEManifoldRattle : public Fix { void init() override; void reset_dt() override; void end_of_step() override; - int dof(int) override; + bigint dof(int) override; void setup(int) override {} // Not needed for fixNVE but is for fixNVT double memory_usage() override; diff --git a/src/ML-SNAP/compute_sna_atom.cpp b/src/ML-SNAP/compute_sna_atom.cpp index da49b15117..b1b4a46482 100644 --- a/src/ML-SNAP/compute_sna_atom.cpp +++ b/src/ML-SNAP/compute_sna_atom.cpp @@ -580,58 +580,56 @@ void ComputeSNAAtom::select3(int k, int n, double *arr, int *iarr, double **arr3 } } -double * ComputeSNAAtom::weights(double * rsq, double rcut, int ncounts) +double *ComputeSNAAtom::weights(double *rsq, double rcut, int ncounts) { - double * w=nullptr; + double *w=nullptr; memory->destroy(w); memory->create(w, ncounts, "snann:gauss_weights"); double rloc=0.; - for (int i=0; i rcut){ - w[i]=0.; - } else { - w[i]=1.; - } + for (int i=0; i rcut){ + w[i]=0.; + } else { + w[i]=1.; } + } return w; } -double * ComputeSNAAtom::tanh_weights(double * rsq, double rcut, double delta, int ncounts) +double *ComputeSNAAtom::tanh_weights(double *rsq, double rcut, double delta, int ncounts) { - double * w=nullptr; + double *w=nullptr; memory->destroy(w); memory->create(w, ncounts, "snann:gauss_weights"); double rloc=0.; - for (int i=0; idestroy(www); } else if (weightmode == 1) { - double * www = tanh_weights(rsq, rcut, delta, ncounts); + double *www = tanh_weights(rsq, rcut, delta, ncounts); S_sol = sum_weights(rsq, www, ncounts); memory->destroy(www); } @@ -639,38 +637,31 @@ double ComputeSNAAtom::get_target_rcut(double S_target, double * rsq, double rcu return err; } -double * ComputeSNAAtom::dichotomie(double S_target, double a, double b, double e, double * rsq, int ncounts, int weightmode, double delta) +double *ComputeSNAAtom::dichotomie(double S_target, double a, double b, double e, double *rsq, + int ncounts, int weightmode, double delta) { double d=b-a; - double * sol = nullptr; + double *sol = nullptr; memory->destroy(sol); memory->create(sol, 2, "snann:sol"); - double m=0.; + double m=0.0; - int cnt=0; - do - { - m = ( a + b ) / 2.; - d = fabs( b - a ); - double f_ra = get_target_rcut(S_target, rsq, a, ncounts, weightmode, delta); - double f_rm = get_target_rcut(S_target, rsq, m, ncounts, weightmode, delta); - if (f_rm == 0.) - { - sol[0]=m; - sol[1]=m; - return sol; - } - else if (f_rm*f_ra > 0.) - { - a = m; - } - else - { - b = m; - } - cnt+=1; - } while ( d > e ); + do { + m = (a + b) / 2.0; + d = fabs(b - a); + double f_ra = get_target_rcut(S_target, rsq, a, ncounts, weightmode, delta); + double f_rm = get_target_rcut(S_target, rsq, m, ncounts, weightmode, delta); + if (f_rm == 0.0) { + sol[0]=m; + sol[1]=m; + return sol; + } else if (f_rm*f_ra > 0.0) { + a = m; + } else { + b = m; + } + } while (d > e); sol[0]=a; sol[1]=b; return sol; diff --git a/src/OPENMP/angle_cosine_periodic_omp.cpp b/src/OPENMP/angle_cosine_periodic_omp.cpp index 43b3a54a47..48532c8f6c 100644 --- a/src/OPENMP/angle_cosine_periodic_omp.cpp +++ b/src/OPENMP/angle_cosine_periodic_omp.cpp @@ -140,7 +140,7 @@ void AngleCosinePeriodicOMP::eval(int nfrom, int nto, ThrData * const thr) tn = 1.0; tn_1 = 1.0; tn_2 = 0.0; - un = 1.0; + un = (m==1) ? 2.0 : 1.0; un_1 = 2.0; un_2 = 0.0; diff --git a/src/OPENMP/angle_lepton_omp.cpp b/src/OPENMP/angle_lepton_omp.cpp index 7e86a9e9bb..f57cf916a2 100644 --- a/src/OPENMP/angle_lepton_omp.cpp +++ b/src/OPENMP/angle_lepton_omp.cpp @@ -91,10 +91,17 @@ void AngleLeptonOMP::eval(int nfrom, int nto, ThrData *const thr) { std::vector angleforce; std::vector anglepot; + std::vector has_ref; try { for (const auto &expr : expressions) { auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp)); angleforce.emplace_back(parsed.differentiate("theta").createCompiledExpression()); + has_ref.push_back(true); + try { + angleforce.back().getVariableReference("theta"); + } catch (Lepton::Exception &) { + has_ref.back() = false; + } if (EFLAG) anglepot.emplace_back(parsed.createCompiledExpression()); } } catch (std::exception &e) { @@ -146,8 +153,7 @@ void AngleLeptonOMP::eval(int nfrom, int nto, ThrData *const thr) const double dtheta = acos(c) - theta0[type]; const int idx = type2expression[type]; - angleforce[idx].getVariableReference("theta") = dtheta; - + if (has_ref[idx]) angleforce[idx].getVariableReference("theta") = dtheta; const double a = -angleforce[idx].evaluate() * s; const double a11 = a * c / rsq1; const double a12 = -a / (r1 * r2); @@ -183,7 +189,11 @@ void AngleLeptonOMP::eval(int nfrom, int nto, ThrData *const thr) double eangle = 0.0; if (EFLAG) { - anglepot[idx].getVariableReference("theta") = dtheta; + try { + anglepot[idx].getVariableReference("theta") = dtheta; + } catch (Lepton::Exception &) { + ; // ignore -> constant force + } eangle = anglepot[idx].evaluate() - offset[type]; } if (EVFLAG) diff --git a/src/OPENMP/bond_lepton_omp.cpp b/src/OPENMP/bond_lepton_omp.cpp index 0029062366..d9982b08f8 100644 --- a/src/OPENMP/bond_lepton_omp.cpp +++ b/src/OPENMP/bond_lepton_omp.cpp @@ -89,10 +89,17 @@ void BondLeptonOMP::eval(int nfrom, int nto, ThrData *const thr) { std::vector bondforce; std::vector bondpot; + std::vector has_ref; try { for (const auto &expr : expressions) { auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp)); bondforce.emplace_back(parsed.differentiate("r").createCompiledExpression()); + has_ref.push_back(true); + try { + bondforce.back().getVariableReference("r"); + } catch (Lepton::Exception &) { + has_ref.back() = false; + } if (EFLAG) bondpot.emplace_back(parsed.createCompiledExpression()); } } catch (std::exception &e) { @@ -122,7 +129,7 @@ void BondLeptonOMP::eval(int nfrom, int nto, ThrData *const thr) double fbond = 0.0; if (r > 0.0) { - bondforce[idx].getVariableReference("r") = dr; + if (has_ref[idx]) bondforce[idx].getVariableReference("r") = dr; fbond = -bondforce[idx].evaluate() / r; } @@ -142,7 +149,11 @@ void BondLeptonOMP::eval(int nfrom, int nto, ThrData *const thr) double ebond = 0.0; if (EFLAG) { - bondpot[idx].getVariableReference("r") = dr; + try { + bondpot[idx].getVariableReference("r") = dr; + } catch (Lepton::Exception &) { + ; // ignore -> constant potential + } ebond = bondpot[idx].evaluate() - offset[type]; } if (EVFLAG) diff --git a/src/OPENMP/dihedral_lepton_omp.cpp b/src/OPENMP/dihedral_lepton_omp.cpp index 13a1328058..37748ce9d5 100644 --- a/src/OPENMP/dihedral_lepton_omp.cpp +++ b/src/OPENMP/dihedral_lepton_omp.cpp @@ -19,9 +19,9 @@ #include "atom.h" #include "comm.h" #include "force.h" +#include "math_extra.h" #include "neighbor.h" #include "suffix.h" -#include "math_extra.h" #include @@ -94,10 +94,17 @@ void DihedralLeptonOMP::eval(int nfrom, int nto, ThrData *const thr) { std::vector dihedralforce; std::vector dihedralpot; + std::vector has_ref; try { for (const auto &expr : expressions) { auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp)); dihedralforce.emplace_back(parsed.differentiate("phi").createCompiledExpression()); + has_ref.push_back(true); + try { + dihedralforce.back().getVariableReference("phi"); + } catch (Lepton::Exception &) { + has_ref.back() = false; + } if (EFLAG) dihedralpot.emplace_back(parsed.createCompiledExpression()); } } catch (std::exception &e) { @@ -106,7 +113,7 @@ void DihedralLeptonOMP::eval(int nfrom, int nto, ThrData *const thr) const double *const *const x = atom->x; auto *_noalias const f = (dbl3_t *) thr->get_f()[0]; - const int * const * const dihedrallist = neighbor->dihedrallist; + const int *const *const dihedrallist = neighbor->dihedrallist; const int nlocal = atom->nlocal; // The dihedral angle "phi" is the angle between n123 and n234 @@ -279,7 +286,7 @@ void DihedralLeptonOMP::eval(int nfrom, int nto, ThrData *const thr) } const int idx = type2expression[type]; - dihedralforce[idx].getVariableReference("phi") = phi; + if (has_ref[idx]) dihedralforce[idx].getVariableReference("phi") = phi; double m_du_dphi = -dihedralforce[idx].evaluate(); // ----- Step 4: Calculate the force direction in real space ----- @@ -323,7 +330,11 @@ void DihedralLeptonOMP::eval(int nfrom, int nto, ThrData *const thr) double edihedral = 0.0; if (EFLAG) { - dihedralpot[idx].getVariableReference("phi") = phi; + try { + dihedralpot[idx].getVariableReference("phi") = phi; + } catch (Lepton::Exception &) { + ; // ignore -> constant potential + } edihedral = dihedralpot[idx].evaluate(); } if (EVFLAG) diff --git a/src/OPENMP/npair_respa_nsq_omp.h b/src/OPENMP/npair_respa_nsq_omp.h index 810931674c..c68d06b4b5 100644 --- a/src/OPENMP/npair_respa_nsq_omp.h +++ b/src/OPENMP/npair_respa_nsq_omp.h @@ -15,7 +15,7 @@ // clang-format off typedef NPairRespaNsqOmp<0,0> NPairHalfRespaNsqNewtoffOmp; NPairStyle(half/respa/nsq/newtoff/omp, - NPairHalfRespaNsqNewtoff, + NPairHalfRespaNsqNewtoffOmp, NP_HALF | NP_RESPA | NP_NSQ | NP_OMP | NP_NEWTOFF | NP_ORTHO | NP_TRI); typedef NPairRespaNsqOmp<1,0> NPairHalfRespaNsqNewtonOmp; diff --git a/src/OPENMP/pair_lepton_coul_omp.cpp b/src/OPENMP/pair_lepton_coul_omp.cpp index bc34bc00af..532c16d797 100644 --- a/src/OPENMP/pair_lepton_coul_omp.cpp +++ b/src/OPENMP/pair_lepton_coul_omp.cpp @@ -20,11 +20,13 @@ #include "neigh_list.h" #include "suffix.h" -#include - #include "Lepton.h" #include "lepton_utils.h" #include "omp_compat.h" + +#include +#include + using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ @@ -101,25 +103,30 @@ void PairLeptonCoulOMP::eval(int iifrom, int iito, ThrData *const thr) std::vector pairforce; std::vector pairpot; - std::vector> have_q; + std::vector> has_ref; try { for (const auto &expr : expressions) { auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp), functions); pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression()); + has_ref.push_back({true, true, true}); + try { + pairforce.back().getVariableReference("r"); + } catch (Lepton::Exception &) { + has_ref.back()[0] = false; + } if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression()); - pairforce.back().getVariableReference("r"); - have_q.emplace_back(true, true); // check if there are references to charges + try { pairforce.back().getVariableReference("qi"); - } catch (std::exception &) { - have_q.back().first = false; + } catch (Lepton::Exception &) { + has_ref.back()[1] = false; } try { pairforce.back().getVariableReference("qj"); - } catch (std::exception &) { - have_q.back().second = false; + } catch (Lepton::Exception &) { + has_ref.back()[2] = false; } } } catch (std::exception &e) { @@ -152,9 +159,9 @@ void PairLeptonCoulOMP::eval(int iifrom, int iito, ThrData *const thr) if (rsq < cutsq[itype][jtype]) { const double r = sqrt(rsq); const int idx = type2expression[itype][jtype]; - pairforce[idx].getVariableReference("r") = r; - if (have_q[idx].first) pairforce[idx].getVariableReference("qi") = q2e * q[i]; - if (have_q[idx].second) pairforce[idx].getVariableReference("qj") = q2e * q[j]; + if (has_ref[idx][0]) pairforce[idx].getVariableReference("r") = r; + if (has_ref[idx][1]) pairforce[idx].getVariableReference("qi") = q2e * q[i]; + if (has_ref[idx][2]) pairforce[idx].getVariableReference("qj") = q2e * q[j]; const double fpair = -pairforce[idx].evaluate() / r * factor_coul; fxtmp += delx * fpair; @@ -168,9 +175,14 @@ void PairLeptonCoulOMP::eval(int iifrom, int iito, ThrData *const thr) double ecoul = 0.0; if (EFLAG) { - pairpot[idx].getVariableReference("r") = r; - if (have_q[idx].first) pairpot[idx].getVariableReference("qi") = q2e * q[i]; - if (have_q[idx].second) pairpot[idx].getVariableReference("qj") = q2e * q[j]; + try { + pairpot[idx].getVariableReference("r") = r; + } catch (Lepton::Exception &) { + ; // ignore -> constant potential + } + if (has_ref[idx][1]) pairpot[idx].getVariableReference("qi") = q2e * q[i]; + if (has_ref[idx][2]) pairpot[idx].getVariableReference("qj") = q2e * q[j]; + ecoul = pairpot[idx].evaluate(); ecoul *= factor_coul; } diff --git a/src/OPENMP/pair_lepton_omp.cpp b/src/OPENMP/pair_lepton_omp.cpp index b57b0fe11e..58692e52d6 100644 --- a/src/OPENMP/pair_lepton_omp.cpp +++ b/src/OPENMP/pair_lepton_omp.cpp @@ -20,11 +20,12 @@ #include "neigh_list.h" #include "suffix.h" -#include - #include "Lepton.h" #include "lepton_utils.h" #include "omp_compat.h" +#include +#include + using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ @@ -96,10 +97,17 @@ void PairLeptonOMP::eval(int iifrom, int iito, ThrData *const thr) std::vector pairforce; std::vector pairpot; + std::vector have_ref; try { for (const auto &expr : expressions) { auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp), functions); pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression()); + have_ref.push_back(true); + try { + pairforce.back().getVariableReference("r"); + } catch (Lepton::Exception &) { + have_ref.back() = false; + } if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression()); } } catch (std::exception &e) { @@ -132,7 +140,7 @@ void PairLeptonOMP::eval(int iifrom, int iito, ThrData *const thr) if (rsq < cutsq[itype][jtype]) { const double r = sqrt(rsq); const int idx = type2expression[itype][jtype]; - pairforce[idx].getVariableReference("r") = r; + if (have_ref[idx]) pairforce[idx].getVariableReference("r") = r; const double fpair = -pairforce[idx].evaluate() / r * factor_lj; fxtmp += delx * fpair; @@ -146,7 +154,11 @@ void PairLeptonOMP::eval(int iifrom, int iito, ThrData *const thr) double evdwl = 0.0; if (EFLAG) { - pairpot[idx].getVariableReference("r") = r; + try { + pairpot[idx].getVariableReference("r") = r; + } catch (Lepton::Exception &) { + ; // ignore -> constant potential + } evdwl = pairpot[idx].evaluate() - offset[itype][jtype]; evdwl *= factor_lj; } diff --git a/src/OPENMP/pair_lepton_sphere_omp.cpp b/src/OPENMP/pair_lepton_sphere_omp.cpp index 6d3a4827b3..79afe27717 100644 --- a/src/OPENMP/pair_lepton_sphere_omp.cpp +++ b/src/OPENMP/pair_lepton_sphere_omp.cpp @@ -20,11 +20,13 @@ #include "neigh_list.h" #include "suffix.h" -#include - #include "Lepton.h" #include "lepton_utils.h" #include "omp_compat.h" + +#include +#include + using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ @@ -99,25 +101,30 @@ void PairLeptonSphereOMP::eval(int iifrom, int iito, ThrData *const thr) std::vector pairforce; std::vector pairpot; - std::vector> have_rad; + std::vector> has_ref; try { for (const auto &expr : expressions) { auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp), functions); pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression()); + has_ref.push_back({true, true, true}); + try { + pairforce.back().getVariableReference("r"); + } catch (Lepton::Exception &) { + has_ref.back()[0] = false; + } if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression()); - pairforce.back().getVariableReference("r"); - have_rad.emplace_back(true, true); - // check if there are references to charges + // check if there are references to radii + try { pairforce.back().getVariableReference("radi"); - } catch (std::exception &) { - have_rad.back().first = false; + } catch (Lepton::Exception &) { + has_ref.back()[1] = false; } try { pairforce.back().getVariableReference("radj"); - } catch (std::exception &) { - have_rad.back().second = false; + } catch (Lepton::Exception &) { + has_ref.back()[2] = false; } } } catch (std::exception &e) { @@ -150,9 +157,9 @@ void PairLeptonSphereOMP::eval(int iifrom, int iito, ThrData *const thr) if (rsq < cutsq[itype][jtype]) { const double r = sqrt(rsq); const int idx = type2expression[itype][jtype]; - pairforce[idx].getVariableReference("r") = r; - if (have_rad[idx].first) pairforce[idx].getVariableReference("radi") = radius[i]; - if (have_rad[idx].second) pairforce[idx].getVariableReference("radj") = radius[j]; + if (has_ref[idx][0]) pairforce[idx].getVariableReference("r") = r; + if (has_ref[idx][1]) pairforce[idx].getVariableReference("radi") = radius[i]; + if (has_ref[idx][2]) pairforce[idx].getVariableReference("radj") = radius[j]; const double fpair = -pairforce[idx].evaluate() / r * factor_lj; fxtmp += delx * fpair; @@ -166,9 +173,14 @@ void PairLeptonSphereOMP::eval(int iifrom, int iito, ThrData *const thr) double evdwl = 0.0; if (EFLAG) { - pairpot[idx].getVariableReference("r") = r; - if (have_rad[idx].first) pairpot[idx].getVariableReference("radi") = radius[i]; - if (have_rad[idx].second) pairpot[idx].getVariableReference("radj") = radius[j]; + try { + pairpot[idx].getVariableReference("r") = r; + } catch (Lepton::Exception &) { + ; // ignore -> constant potential + } + if (has_ref[idx][1]) pairpot[idx].getVariableReference("radi") = radius[i]; + if (has_ref[idx][2]) pairpot[idx].getVariableReference("radj") = radius[j]; + evdwl = pairpot[idx].evaluate(); evdwl *= factor_lj; } diff --git a/src/POEMS/fix_poems.cpp b/src/POEMS/fix_poems.cpp index f289a939e6..55199a7191 100644 --- a/src/POEMS/fix_poems.cpp +++ b/src/POEMS/fix_poems.cpp @@ -855,7 +855,7 @@ void FixPOEMS::pre_neighbor() {} count # of degrees-of-freedom removed by fix_poems for atoms in igroup ------------------------------------------------------------------------- */ -int FixPOEMS::dof(int igroup) +bigint FixPOEMS::dof(int igroup) { int groupbit = group->bitmask[igroup]; @@ -877,17 +877,17 @@ int FixPOEMS::dof(int igroup) // remove 3N - 6 dof for each rigid body if at least 2 atoms are in igroup - int n = 0; + bigint n = 0; for (int ibody = 0; ibody < nbody; ibody++) if (nall[ibody] > 2) n += 3 * nall[ibody] - 6; // subtract 3 additional dof for each joint if atom is also in igroup - int m = 0; + bigint m = 0; for (int i = 0; i < nlocal; i++) if (natom2body[i] > 1 && (mask[i] & groupbit)) m += 3 * (natom2body[i] - 1); - int mall; - MPI_Allreduce(&m, &mall, 1, MPI_INT, MPI_SUM, world); + bigint mall; + MPI_Allreduce(&m, &mall, 1, MPI_LMP_BIGINT, MPI_SUM, world); n += mall; // delete local memory diff --git a/src/POEMS/fix_poems.h b/src/POEMS/fix_poems.h index 99af171636..6aac4abd8a 100644 --- a/src/POEMS/fix_poems.h +++ b/src/POEMS/fix_poems.h @@ -47,7 +47,7 @@ class FixPOEMS : public Fix { double memory_usage() override; void pre_neighbor() override; - int dof(int) override; + bigint dof(int) override; void deform(int) override; int modify_param(int, char **) override; void reset_dt() override; diff --git a/src/QEQ/fix_qeq.cpp b/src/QEQ/fix_qeq.cpp index b60438b7c8..22632cf786 100644 --- a/src/QEQ/fix_qeq.cpp +++ b/src/QEQ/fix_qeq.cpp @@ -338,12 +338,6 @@ void FixQEq::setup_pre_force(int vflag) if (force->newton_pair == 0) error->all(FLERR,"QEQ with 'newton pair off' not supported"); - if (force->pair) { - if (force->pair->suffix_flag & (Suffix::INTEL|Suffix::GPU)) - error->all(FLERR,"QEQ is not compatiple with suffix version " - "of pair style"); - } - deallocate_storage(); allocate_storage(); diff --git a/src/REACTION/README b/src/REACTION/README index 99a5d604ec..b9199d6d47 100644 --- a/src/REACTION/README +++ b/src/REACTION/README @@ -25,4 +25,5 @@ The REACTER methodology is detailed in: https://doi.org/10.1021/acs.macromol.0c02012 This package was created by Jacob Gissinger -(jacob.r.gissinger@gmail.com) at the NASA Langley Research Center. +(jgissing@stevens.edu) while at the NASA Langley Research Center +and Stevens Institute of Technology. diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index d124b06dc2..786f5bfe6e 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -13,7 +13,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- -Contributing Author: Jacob Gissinger (jacob.r.gissinger@gmail.com) +Contributing Author: Jacob Gissinger (jgissing@stevens.edu) ------------------------------------------------------------------------- */ #include "fix_bond_react.h" @@ -670,15 +670,6 @@ FixBondReact::~FixBondReact() memory->destroy(ghostly_rxn_count); memory->destroy(reaction_count_total); - if (newton_bond == 0) { - memory->destroy(xspecial); - memory->destroy(nxspecial); - memory->destroy(onemol_xspecial); - memory->destroy(onemol_nxspecial); - memory->destroy(twomol_xspecial); - memory->destroy(twomol_nxspecial); - } - if (attempted_rxn == 1) { memory->destroy(restore_pt); memory->destroy(restore); @@ -827,11 +818,10 @@ void FixBondReact::init() nlevels_respa = (dynamic_cast(update->integrate))->nlevels; // check cutoff for iatomtype,jatomtype - for (int i = 0; i < nreacts; i++) { - if (!utils::strmatch(force->pair_style,"^hybrid")) - if (force->pair == nullptr || cutsq[i][1] > force->pair->cutsq[iatomtype[i]][jatomtype[i]]) + if (!utils::strmatch(force->pair_style,"^hybrid")) + for (int i = 0; i < nreacts; i++) + if (force->pair == nullptr || (closeneigh[i] < 0 && cutsq[i][1] > force->pair->cutsq[iatomtype[i]][jatomtype[i]])) error->all(FLERR,"Fix bond/react: Fix bond/react cutoff is longer than pairwise cutoff"); - } // need a half neighbor list, built every Nevery steps neighbor->add_request(this, NeighConst::REQ_OCCASIONAL); @@ -931,29 +921,10 @@ void FixBondReact::post_integrate() neighbor->build_one(list,1); - // here we define a full special list, independent of Newton setting - if (newton_bond == 1) { - nxspecial = atom->nspecial; - xspecial = atom->special; - } else { - int nall = atom->nlocal + atom->nghost; - memory->destroy(nxspecial); - memory->destroy(xspecial); - memory->create(nxspecial,nall,3,"bond/react:nxspecial"); - memory->create(xspecial,nall,atom->maxspecial,"bond/react:xspecial"); - for (int i = 0; i < atom->nlocal; i++) { - nxspecial[i][0] = atom->num_bond[i]; - for (int j = 0; j < nxspecial[i][0]; j++) { - xspecial[i][j] = atom->bond_atom[i][j]; - } - nxspecial[i][1] = atom->nspecial[i][1]; - nxspecial[i][2] = atom->nspecial[i][2]; - int joffset = nxspecial[i][0] - atom->nspecial[i][0]; - for (int j = nxspecial[i][0]; j < nxspecial[i][2]; j++) { - xspecial[i][j+joffset] = atom->special[i][j]; - } - } - } + // here we define a full special list + // may need correction for unusual special bond settings + nxspecial = atom->nspecial; + xspecial = atom->special; int j; for (rxnID = 0; rxnID < nreacts; rxnID++) { @@ -2541,49 +2512,15 @@ int FixBondReact::get_chirality(double four_coords[12]) /* ---------------------------------------------------------------------- Get xspecials for current molecule templates + may need correction when specials defined explicitly in molecule templates ------------------------------------------------------------------------- */ void FixBondReact::get_molxspecials() { - if (newton_bond == 1) { - onemol_nxspecial = onemol->nspecial; - onemol_xspecial = onemol->special; - twomol_nxspecial = twomol->nspecial; - twomol_xspecial = twomol->special; - } else { - memory->destroy(onemol_nxspecial); - memory->destroy(onemol_xspecial); - memory->create(onemol_nxspecial,onemol->natoms,3,"bond/react:onemol_nxspecial"); - memory->create(onemol_xspecial,onemol->natoms,atom->maxspecial,"bond/react:onemol_xspecial"); - for (int i = 0; i < onemol->natoms; i++) { - onemol_nxspecial[i][0] = onemol->num_bond[i]; - for (int j = 0; j < onemol_nxspecial[i][0]; j++) { - onemol_xspecial[i][j] = onemol->bond_atom[i][j]; - } - onemol_nxspecial[i][1] = onemol->nspecial[i][1]; - onemol_nxspecial[i][2] = onemol->nspecial[i][2]; - int joffset = onemol_nxspecial[i][0] - onemol->nspecial[i][0]; - for (int j = onemol_nxspecial[i][0]; j < onemol_nxspecial[i][2]; j++) { - onemol_xspecial[i][j+joffset] = onemol->special[i][j]; - } - } - memory->destroy(twomol_nxspecial); - memory->destroy(twomol_xspecial); - memory->create(twomol_nxspecial,twomol->natoms,3,"bond/react:twomol_nxspecial"); - memory->create(twomol_xspecial,twomol->natoms,atom->maxspecial,"bond/react:twomol_xspecial"); - for (int i = 0; i < twomol->natoms; i++) { - twomol_nxspecial[i][0] = twomol->num_bond[i]; - for (int j = 0; j < twomol_nxspecial[i][0]; j++) { - twomol_xspecial[i][j] = twomol->bond_atom[i][j]; - } - twomol_nxspecial[i][1] = twomol->nspecial[i][1]; - twomol_nxspecial[i][2] = twomol->nspecial[i][2]; - int joffset = twomol_nxspecial[i][0] - twomol->nspecial[i][0]; - for (int j = twomol_nxspecial[i][0]; j < twomol_nxspecial[i][2]; j++) { - twomol_xspecial[i][j+joffset] = twomol->special[i][j]; - } - } - } + onemol_nxspecial = onemol->nspecial; + onemol_xspecial = onemol->special; + twomol_nxspecial = twomol->nspecial; + twomol_xspecial = twomol->special; } /* ---------------------------------------------------------------------- @@ -2682,16 +2619,43 @@ void FixBondReact::find_landlocked_atoms(int myrxn) } // also, if atoms change number of bonds, but aren't landlocked, that could be bad + int warnflag = 0; if (comm->me == 0) for (int i = 0; i < twomol->natoms; i++) { if ((create_atoms[i][myrxn] == 0) && (twomol_nxspecial[i][0] != onemol_nxspecial[equivalences[i][1][myrxn]-1][0]) && - (landlocked_atoms[i][myrxn] == 0)) - error->warning(FLERR, "Fix bond/react: Atom affected by reaction {} is too close " - "to template edge",rxn_name[myrxn]); - break; + (landlocked_atoms[i][myrxn] == 0)) { + warnflag = 1; + break; + } } + // also, if an atom changes any of its bonds, but is not landlocked, that could be bad + int thereflag; + if (comm->me == 0) + for (int i = 0; i < twomol->natoms; i++) { + if (landlocked_atoms[i][myrxn] == 1) continue; + for (int j = 0; j < twomol_nxspecial[i][0]; j++) { + int oneneighID = equivalences[twomol_xspecial[i][j]-1][1][myrxn]; + int ii = equivalences[i][1][myrxn] - 1; + thereflag = 0; + for (int k = 0; k < onemol_nxspecial[ii][0]; k++) { + if (oneneighID == onemol_xspecial[ii][k]) { + thereflag = 1; + break; + } + } + if (thereflag == 0) { + warnflag = 1; + break; + } + } + if (warnflag == 1) break; + } + + if (comm->me == 0 && warnflag == 1) error->warning(FLERR, "Fix bond/react: Atom affected " + "by reaction {} is too close to template edge",rxn_name[myrxn]); + // finally, if a created atom is not landlocked, bad! for (int i = 0; i < twomol->natoms; i++) { if (create_atoms[i][myrxn] == 1 && landlocked_atoms[i][myrxn] == 0) { @@ -3349,7 +3313,7 @@ void FixBondReact::update_everything() dynamic_cast(ihistory)->clear_cache(); // Angles! First let's delete all angle info: - if (force->angle && twomol->angleflag) { + if (force->angle) { int *num_angle = atom->num_angle; int **angle_type = atom->angle_type; tagint **angle_atom1 = atom->angle_atom1; @@ -3390,33 +3354,35 @@ void FixBondReact::update_everything() } } // now let's add the new angle info. - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - num_angle[atom->map(update_mega_glove[jj+1][i])] = twomol->num_angle[j]; - delta_angle += twomol->num_angle[j]; - for (int p = 0; p < twomol->num_angle[j]; p++) { - angle_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->angle_type[j][p]; - angle_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i]; - angle_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i]; - angle_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i]; + if (twomol->angleflag) { + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + num_angle[atom->map(update_mega_glove[jj+1][i])] = twomol->num_angle[j]; + delta_angle += twomol->num_angle[j]; + for (int p = 0; p < twomol->num_angle[j]; p++) { + angle_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->angle_type[j][p]; + angle_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i]; + angle_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i]; + angle_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i]; + } } - } - if (landlocked_atoms[j][rxnID] == 0) { - for (int p = 0; p < twomol->num_angle[j]; p++) { - if (landlocked_atoms[twomol->angle_atom1[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->angle_atom2[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->angle_atom3[j][p]-1][rxnID] == 1) { - insert_num = num_angle[atom->map(update_mega_glove[jj+1][i])]; - angle_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->angle_type[j][p]; - angle_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i]; - angle_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i]; - angle_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i]; - num_angle[atom->map(update_mega_glove[jj+1][i])]++; - if (num_angle[atom->map(update_mega_glove[jj+1][i])] > atom->angle_per_atom) - error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); - delta_angle++; + if (landlocked_atoms[j][rxnID] == 0) { + for (int p = 0; p < twomol->num_angle[j]; p++) { + if (landlocked_atoms[twomol->angle_atom1[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->angle_atom2[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->angle_atom3[j][p]-1][rxnID] == 1) { + insert_num = num_angle[atom->map(update_mega_glove[jj+1][i])]; + angle_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->angle_type[j][p]; + angle_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i]; + angle_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i]; + angle_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i]; + num_angle[atom->map(update_mega_glove[jj+1][i])]++; + if (num_angle[atom->map(update_mega_glove[jj+1][i])] > atom->angle_per_atom) + error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); + delta_angle++; + } } } } @@ -3426,7 +3392,7 @@ void FixBondReact::update_everything() } // Dihedrals! first let's delete all dihedral info for landlocked atoms - if (force->dihedral && twomol->dihedralflag) { + if (force->dihedral) { int *num_dihedral = atom->num_dihedral; int **dihedral_type = atom->dihedral_type; tagint **dihedral_atom1 = atom->dihedral_atom1; @@ -3470,36 +3436,38 @@ void FixBondReact::update_everything() } } // now let's add new dihedral info - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - num_dihedral[atom->map(update_mega_glove[jj+1][i])] = twomol->num_dihedral[j]; - delta_dihed += twomol->num_dihedral[j]; - for (int p = 0; p < twomol->num_dihedral[j]; p++) { - dihedral_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->dihedral_type[j][p]; - dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i]; - dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i]; - dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i]; - dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i]; + if (twomol->dihedralflag) { + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + num_dihedral[atom->map(update_mega_glove[jj+1][i])] = twomol->num_dihedral[j]; + delta_dihed += twomol->num_dihedral[j]; + for (int p = 0; p < twomol->num_dihedral[j]; p++) { + dihedral_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->dihedral_type[j][p]; + dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i]; + dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i]; + dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i]; + dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i]; + } } - } - if (landlocked_atoms[j][rxnID] == 0) { - for (int p = 0; p < twomol->num_dihedral[j]; p++) { - if (landlocked_atoms[twomol->dihedral_atom1[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->dihedral_atom2[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->dihedral_atom3[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->dihedral_atom4[j][p]-1][rxnID] == 1) { - insert_num = num_dihedral[atom->map(update_mega_glove[jj+1][i])]; - dihedral_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->dihedral_type[j][p]; - dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i]; - dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i]; - dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i]; - dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i]; - num_dihedral[atom->map(update_mega_glove[jj+1][i])]++; - if (num_dihedral[atom->map(update_mega_glove[jj+1][i])] > atom->dihedral_per_atom) - error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); - delta_dihed++; + if (landlocked_atoms[j][rxnID] == 0) { + for (int p = 0; p < twomol->num_dihedral[j]; p++) { + if (landlocked_atoms[twomol->dihedral_atom1[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->dihedral_atom2[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->dihedral_atom3[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->dihedral_atom4[j][p]-1][rxnID] == 1) { + insert_num = num_dihedral[atom->map(update_mega_glove[jj+1][i])]; + dihedral_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->dihedral_type[j][p]; + dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i]; + dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i]; + dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i]; + dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i]; + num_dihedral[atom->map(update_mega_glove[jj+1][i])]++; + if (num_dihedral[atom->map(update_mega_glove[jj+1][i])] > atom->dihedral_per_atom) + error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); + delta_dihed++; + } } } } @@ -3509,7 +3477,7 @@ void FixBondReact::update_everything() } // finally IMPROPERS!!!! first let's delete all improper info for landlocked atoms - if (force->improper && twomol->improperflag) { + if (force->improper) { int *num_improper = atom->num_improper; int **improper_type = atom->improper_type; tagint **improper_atom1 = atom->improper_atom1; @@ -3553,36 +3521,38 @@ void FixBondReact::update_everything() } } // now let's add new improper info - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - num_improper[atom->map(update_mega_glove[jj+1][i])] = twomol->num_improper[j]; - delta_imprp += twomol->num_improper[j]; - for (int p = 0; p < twomol->num_improper[j]; p++) { - improper_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->improper_type[j][p]; - improper_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i]; - improper_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i]; - improper_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i]; - improper_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i]; + if (twomol->improperflag) { + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + num_improper[atom->map(update_mega_glove[jj+1][i])] = twomol->num_improper[j]; + delta_imprp += twomol->num_improper[j]; + for (int p = 0; p < twomol->num_improper[j]; p++) { + improper_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->improper_type[j][p]; + improper_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i]; + improper_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i]; + improper_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i]; + improper_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i]; + } } - } - if (landlocked_atoms[j][rxnID] == 0) { - for (int p = 0; p < twomol->num_improper[j]; p++) { - if (landlocked_atoms[twomol->improper_atom1[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->improper_atom2[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->improper_atom3[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->improper_atom4[j][p]-1][rxnID] == 1) { - insert_num = num_improper[atom->map(update_mega_glove[jj+1][i])]; - improper_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->improper_type[j][p]; - improper_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i]; - improper_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i]; - improper_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i]; - improper_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i]; - num_improper[atom->map(update_mega_glove[jj+1][i])]++; - if (num_improper[atom->map(update_mega_glove[jj+1][i])] > atom->improper_per_atom) - error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); - delta_imprp++; + if (landlocked_atoms[j][rxnID] == 0) { + for (int p = 0; p < twomol->num_improper[j]; p++) { + if (landlocked_atoms[twomol->improper_atom1[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->improper_atom2[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->improper_atom3[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->improper_atom4[j][p]-1][rxnID] == 1) { + insert_num = num_improper[atom->map(update_mega_glove[jj+1][i])]; + improper_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->improper_type[j][p]; + improper_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i]; + improper_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i]; + improper_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i]; + improper_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i]; + num_improper[atom->map(update_mega_glove[jj+1][i])]++; + if (num_improper[atom->map(update_mega_glove[jj+1][i])] > atom->improper_per_atom) + error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); + delta_imprp++; + } } } } @@ -3895,7 +3865,8 @@ int FixBondReact::insert_atoms(tagint **my_update_mega_glove, int iupdate) // guess a somewhat reasonable initial velocity based on reaction site // further control is possible using bond_react_MASTER_group // compute |velocity| corresponding to a given temperature t, using specific atom's mass - double vtnorm = sqrt(t / (force->mvv2e / (dimension * force->boltz)) / atom->mass[twomol->type[m]]); + double mymass = atom->rmass ? atom->rmass[n] : atom->mass[twomol->type[m]]; + double vtnorm = sqrt(t / (force->mvv2e / (dimension * force->boltz)) / mymass); v[n][0] = random[rxnID]->uniform(); v[n][1] = random[rxnID]->uniform(); v[n][2] = random[rxnID]->uniform(); diff --git a/src/REACTION/fix_bond_react.h b/src/REACTION/fix_bond_react.h index 534261e11d..8c9fc9dce4 100644 --- a/src/REACTION/fix_bond_react.h +++ b/src/REACTION/fix_bond_react.h @@ -12,7 +12,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing Author: Jacob Gissinger (jacob.r.gissinger@gmail.com) + Contributing Author: Jacob Gissinger (jgissing@stevens.edu) ------------------------------------------------------------------------- */ #ifdef FIX_CLASS @@ -139,7 +139,7 @@ class FixBondReact : public Fix { int avail_guesses; // num of restore points available int *guess_branch; // used when there is more than two choices when guessing int **restore_pt; // contains info about restore points - tagint **restore; // contaings info about restore points + tagint **restore; // contains info about restore points int *pioneer_count; // counts pioneers int **edge; // atoms in molecule templates with incorrect valences diff --git a/src/RIGID/fix_rigid.cpp b/src/RIGID/fix_rigid.cpp index 628abb240e..bd3c53e3ec 100644 --- a/src/RIGID/fix_rigid.cpp +++ b/src/RIGID/fix_rigid.cpp @@ -1247,7 +1247,7 @@ void FixRigid::enforce2d() return total count of DOF ------------------------------------------------------------------------- */ -int FixRigid::dof(int tgroup) +bigint FixRigid::dof(int tgroup) { // cannot count DOF correctly unless setup_bodies_static() has been called @@ -1306,7 +1306,7 @@ int FixRigid::dof(int tgroup) // 3d body with any finite-size M should have 6 dof, remove (3N+6M) - 6 // 2d body with any finite-size M should have 3 dof, remove (2N+3M) - 3 - int n = 0; + bigint n = 0; nlinear = 0; if (domain->dimension == 3) { for (int ibody = 0; ibody < nbody; ibody++) diff --git a/src/RIGID/fix_rigid.h b/src/RIGID/fix_rigid.h index 361ddd2720..c2f04ecf1a 100644 --- a/src/RIGID/fix_rigid.h +++ b/src/RIGID/fix_rigid.h @@ -48,7 +48,7 @@ class FixRigid : public Fix { void setup_pre_neighbor() override; void pre_neighbor() override; - int dof(int) override; + bigint dof(int) override; void deform(int) override; void reset_dt() override; void zero_momentum() override; diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index bd49834f15..5905e44595 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -1123,7 +1123,7 @@ void FixRigidSmall::enforce2d() return total count of DOF ------------------------------------------------------------------------- */ -int FixRigidSmall::dof(int tgroup) +bigint FixRigidSmall::dof(int tgroup) { int i,j; @@ -1195,7 +1195,7 @@ int FixRigidSmall::dof(int tgroup) double *inertia; - int n = 0; + bigint n = 0; nlinear = 0; if (domain->dimension == 3) { for (int ibody = 0; ibody < nlocal_body; ibody++) { @@ -1216,8 +1216,8 @@ int FixRigidSmall::dof(int tgroup) memory->destroy(counts); - int nall; - MPI_Allreduce(&n,&nall,1,MPI_INT,MPI_SUM,world); + bigint nall; + MPI_Allreduce(&n,&nall,1,MPI_LMP_BIGINT,MPI_SUM,world); return nall; } diff --git a/src/RIGID/fix_rigid_small.h b/src/RIGID/fix_rigid_small.h index 0070d976df..0508063f05 100644 --- a/src/RIGID/fix_rigid_small.h +++ b/src/RIGID/fix_rigid_small.h @@ -54,7 +54,7 @@ class FixRigidSmall : public Fix { void setup_pre_neighbor() override; void pre_neighbor() override; - int dof(int) override; + bigint dof(int) override; void deform(int) override; void reset_dt() override; void zero_momentum() override; diff --git a/src/RIGID/fix_shake.cpp b/src/RIGID/fix_shake.cpp index b2c65220bc..15bd5d207f 100644 --- a/src/RIGID/fix_shake.cpp +++ b/src/RIGID/fix_shake.cpp @@ -207,8 +207,8 @@ FixShake::FixShake(LAMMPS *lmp, int narg, char **arg) : if (output_every) { int nb = atom->nbondtypes + 1; - b_count = new int[nb]; - b_count_all = new int[nb]; + b_count = new bigint[nb]; + b_count_all = new bigint[nb]; b_ave = new double[nb]; b_ave_all = new double[nb]; b_max = new double[nb]; @@ -217,8 +217,8 @@ FixShake::FixShake(LAMMPS *lmp, int narg, char **arg) : b_min_all = new double[nb]; int na = atom->nangletypes + 1; - a_count = new int[na]; - a_count_all = new int[na]; + a_count = new bigint[na]; + a_count_all = new bigint[na]; a_ave = new double[na]; a_ave_all = new double[na]; a_max = new double[na]; @@ -755,7 +755,7 @@ void FixShake::min_post_force(int vflag) count # of degrees-of-freedom removed by SHAKE for atoms in igroup ------------------------------------------------------------------------- */ -int FixShake::dof(int igroup) +bigint FixShake::dof(int igroup) { int groupbit = group->bitmask[igroup]; @@ -766,7 +766,7 @@ int FixShake::dof(int igroup) // count dof in a cluster if and only if // the central atom is in group and atom i is the central atom - int n = 0; + bigint n = 0; for (int i = 0; i < nlocal; i++) { if (!(mask[i] & groupbit)) continue; if (shake_flag[i] == 0) continue; @@ -777,8 +777,8 @@ int FixShake::dof(int igroup) else if (shake_flag[i] == 4) n += 3; } - int nall; - MPI_Allreduce(&n,&nall,1,MPI_INT,MPI_SUM,world); + bigint nall; + MPI_Allreduce(&n,&nall,1,MPI_LMP_BIGINT,MPI_SUM,world); return nall; } @@ -1098,7 +1098,7 @@ void FixShake::find_clusters() // print info on SHAKE clusters // ----------------------------------------------------- - int count1,count2,count3,count4; + bigint count1,count2,count3,count4; count1 = count2 = count3 = count4 = 0; for (i = 0; i < nlocal; i++) { if (shake_flag[i] == 1) count1++; @@ -1107,15 +1107,15 @@ void FixShake::find_clusters() else if (shake_flag[i] == 4) count4++; } - int tmp; + bigint tmp; tmp = count1; - MPI_Allreduce(&tmp,&count1,1,MPI_INT,MPI_SUM,world); + MPI_Allreduce(&tmp,&count1,1,MPI_LMP_BIGINT,MPI_SUM,world); tmp = count2; - MPI_Allreduce(&tmp,&count2,1,MPI_INT,MPI_SUM,world); + MPI_Allreduce(&tmp,&count2,1,MPI_LMP_BIGINT,MPI_SUM,world); tmp = count3; - MPI_Allreduce(&tmp,&count3,1,MPI_INT,MPI_SUM,world); + MPI_Allreduce(&tmp,&count3,1,MPI_LMP_BIGINT,MPI_SUM,world); tmp = count4; - MPI_Allreduce(&tmp,&count4,1,MPI_INT,MPI_SUM,world); + MPI_Allreduce(&tmp,&count4,1,MPI_LMP_BIGINT,MPI_SUM,world); if (comm->me == 0) { utils::logmesg(lmp,"{:>8} = # of size 2 clusters\n" @@ -2682,12 +2682,12 @@ void FixShake::stats() // sum across all procs - MPI_Allreduce(b_count,b_count_all,nb,MPI_INT,MPI_SUM,world); + MPI_Allreduce(b_count,b_count_all,nb,MPI_LMP_BIGINT,MPI_SUM,world); MPI_Allreduce(b_ave,b_ave_all,nb,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(b_max,b_max_all,nb,MPI_DOUBLE,MPI_MAX,world); MPI_Allreduce(b_min,b_min_all,nb,MPI_DOUBLE,MPI_MIN,world); - MPI_Allreduce(a_count,a_count_all,na,MPI_INT,MPI_SUM,world); + MPI_Allreduce(a_count,a_count_all,na,MPI_LMP_BIGINT,MPI_SUM,world); MPI_Allreduce(a_ave,a_ave_all,na,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(a_max,a_max_all,na,MPI_DOUBLE,MPI_MAX,world); MPI_Allreduce(a_min,a_min_all,na,MPI_DOUBLE,MPI_MIN,world); diff --git a/src/RIGID/fix_shake.h b/src/RIGID/fix_shake.h index 3b04560f09..d02fdd784a 100644 --- a/src/RIGID/fix_shake.h +++ b/src/RIGID/fix_shake.h @@ -59,7 +59,7 @@ class FixShake : public Fix { virtual void correct_coordinates(int vflag); virtual void correct_velocities(); - int dof(int) override; + bigint dof(int) override; void reset_dt() override; void *extract(const char *, int &) override; double compute_scalar() override; @@ -117,10 +117,10 @@ class FixShake : public Fix { int nlist, maxlist; // size and max-size of list // stat quantities - int *b_count, *b_count_all; // counts for each bond type, atoms in bond cluster + bigint *b_count, *b_count_all; // counts for each bond type, atoms in bond cluster double *b_ave, *b_max, *b_min; // ave/max/min dist for each bond type double *b_ave_all, *b_max_all, *b_min_all; // MPI summing arrays - int *a_count, *a_count_all; // ditto for angle types + bigint *a_count, *a_count_all; // ditto for angle types double *a_ave, *a_max, *a_min; double *a_ave_all, *a_max_all, *a_min_all; diff --git a/src/atom.cpp b/src/atom.cpp index b604c54e6b..c08df16614 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -26,6 +26,7 @@ #include "input.h" #include "label_map.h" #include "math_const.h" +#include "math_extra.h" #include "memory.h" #include "modify.h" #include "molecule.h" @@ -2112,6 +2113,15 @@ std::vectorAtom::get_molecule_by_id(const std::string &id) void Atom::add_molecule_atom(Molecule *onemol, int iatom, int ilocal, tagint offset) { if (onemol->qflag && q_flag) q[ilocal] = onemol->q[iatom]; + if (onemol->muflag && mu_flag) { + double r[3], rotmat[3][3]; + MathExtra::quat_to_mat(onemol->quat_external, rotmat); + MathExtra::matvec(rotmat, onemol->mu[iatom], r); + mu[ilocal][0] = r[0]; + mu[ilocal][1] = r[1]; + mu[ilocal][2] = r[2]; + mu[ilocal][3] = sqrt(r[0] * r[0] + r[1] * r[1] + r[2] * r[2]); + } if (onemol->radiusflag && radius_flag) radius[ilocal] = onemol->radius[iatom]; if (onemol->rmassflag && rmass_flag) rmass[ilocal] = onemol->rmass[iatom]; else if (rmass_flag) diff --git a/src/compute.cpp b/src/compute.cpp index 2bd1544fd7..d47d1d5292 100644 --- a/src/compute.cpp +++ b/src/compute.cpp @@ -83,7 +83,7 @@ Compute::Compute(LAMMPS *lmp, int narg, char **arg) : extra_dof = domain->dimension; dynamic_user = 0; - fix_dof = 0; + fix_dof = 0.0; // setup list of timesteps diff --git a/src/compute.h b/src/compute.h index 8ae01a4469..6956c3ae99 100644 --- a/src/compute.h +++ b/src/compute.h @@ -178,7 +178,7 @@ class Compute : protected Pointers { double natoms_temp; // # of atoms used for temperature calculation double extra_dof; // extra DOF for temperature computes - int fix_dof; // DOF due to fixes + double fix_dof; // DOF due to fixes int dynamic; // recount atoms for temperature computes int dynamic_user; // user request for temp compute to be dynamic diff --git a/src/compute_pair.cpp b/src/compute_pair.cpp index e789adbc89..1cb22a006f 100644 --- a/src/compute_pair.cpp +++ b/src/compute_pair.cpp @@ -75,7 +75,7 @@ ComputePair::ComputePair(LAMMPS *lmp, int narg, char **arg) : pair = force->pair_match(pstyle, 1, nsub); } - if (!pair) error->all(FLERR, "Unrecognized pair style {} in compute pair command", pstyle); + if (!pair) error->all(FLERR, "Unused pair style {} in compute pair command", pstyle); npair = pair->nextra; if (npair) { diff --git a/src/displace_atoms.cpp b/src/displace_atoms.cpp index fa333f1bc2..5ecf5a2c9e 100644 --- a/src/displace_atoms.cpp +++ b/src/displace_atoms.cpp @@ -160,7 +160,7 @@ void DisplaceAtoms::command(int narg, char **arg) int *mask = atom->mask; int nlocal = atom->nlocal; - double fraction,dramp; + double fraction, dramp; for (i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { @@ -255,11 +255,12 @@ void DisplaceAtoms::command(int narg, char **arg) int line_flag = atom->line_flag; int tri_flag = atom->tri_flag; int body_flag = atom->body_flag; + int quat_atom_flag = atom->quat_flag; int theta_flag = 0; int quat_flag = 0; if (line_flag) theta_flag = 1; - if (ellipsoid_flag || tri_flag || body_flag) quat_flag = 1; + if (ellipsoid_flag || tri_flag || body_flag || quat_atom_flag) quat_flag = 1; // AtomVec pointers to retrieve per-atom storage of extra quantities @@ -269,6 +270,7 @@ void DisplaceAtoms::command(int narg, char **arg) auto avec_body = dynamic_cast(atom->style_match("body")); double **x = atom->x; + double **quat_atom = atom->quat; int *ellipsoid = atom->ellipsoid; int *line = atom->line; int *tri = atom->tri; @@ -313,7 +315,7 @@ void DisplaceAtoms::command(int narg, char **arg) // quats for ellipsoids, tris, and bodies - if (quat_flag) { + if (quat_flag && !quat_atom_flag) { quat = nullptr; if (ellipsoid_flag && ellipsoid[i] >= 0) quat = avec_ellipsoid->bonus[ellipsoid[i]].quat; @@ -322,12 +324,18 @@ void DisplaceAtoms::command(int narg, char **arg) else if (body_flag && body[i] >= 0) quat = avec_body->bonus[body[i]].quat; if (quat) { - MathExtra::quatquat(qrotate,quat,qnew); + MathExtra::quatquat(qrotate, quat, qnew); quat[0] = qnew[0]; quat[1] = qnew[1]; quat[2] = qnew[2]; quat[3] = qnew[3]; } + } else if (quat_atom_flag) { + MathExtra::quatquat(qrotate, quat_atom[i], qnew); + quat_atom[i][0] = qnew[0]; + quat_atom[i][1] = qnew[1]; + quat_atom[i][2] = qnew[2]; + quat_atom[i][3] = qnew[3]; } } } diff --git a/src/fix.h b/src/fix.h index 9b595f0c60..ca0a1ef84b 100644 --- a/src/fix.h +++ b/src/fix.h @@ -99,8 +99,8 @@ class Fix : protected Pointers { int size_local_cols; // 0 = vector, N = columns in local array int local_freq; // frequency local data is available at - int pergrid_flag; // 0/1 if per-grid data is stored - int pergrid_freq; // frequency per-grid data is available at + int pergrid_flag; // 0/1 if per-grid data is stored + int pergrid_freq; // frequency per-grid data is available at int extscalar; // 0/1 if global scalar is intensive/extensive int extvector; // 0/1/-1 if global vector is all int/ext/extlist @@ -129,11 +129,11 @@ class Fix : protected Pointers { // KOKKOS flags and variables - int kokkosable; // 1 if Kokkos fix - int forward_comm_device; // 1 if forward comm on Device - int exchange_comm_device; // 1 if exchange comm on Device - int fuse_integrate_flag; // 1 if can fuse initial integrate with final integrate - int sort_device; // 1 if sort on Device + int kokkosable; // 1 if Kokkos fix + int forward_comm_device; // 1 if forward comm on Device + int exchange_comm_device; // 1 if exchange comm on Device + int fuse_integrate_flag; // 1 if can fuse initial integrate with final integrate + int sort_device; // 1 if sort on Device ExecutionSpace execution_space; unsigned int datamask_read, datamask_modify; @@ -223,7 +223,7 @@ class Fix : protected Pointers { virtual void unpack_reverse_grid(int, void *, int, int *){}; virtual void pack_remap_grid(int, void *, int, int *){}; virtual void unpack_remap_grid(int, void *, int, int *){}; - virtual int unpack_read_grid(int, char *) {return 0;}; + virtual int unpack_read_grid(int, char *) { return 0; }; virtual void pack_write_grid(int, void *){}; virtual void unpack_write_grid(int, void *, int *){}; @@ -236,7 +236,7 @@ class Fix : protected Pointers { virtual double compute_vector(int) { return 0.0; } virtual double compute_array(int, int) { return 0.0; } - virtual int dof(int) { return 0; } + virtual bigint dof(int) { return 0; } virtual void deform(int) {} virtual void reset_target(double) {} virtual void reset_dt() {} diff --git a/src/fix_move.cpp b/src/fix_move.cpp index 36bba410fc..53009495b1 100644 --- a/src/fix_move.cpp +++ b/src/fix_move.cpp @@ -276,10 +276,11 @@ FixMove::FixMove(LAMMPS *lmp, int narg, char **arg) : line_flag = atom->line_flag; tri_flag = atom->tri_flag; body_flag = atom->body_flag; + quat_atom_flag = atom->quat_flag; theta_flag = quat_flag = 0; if (line_flag) theta_flag = 1; - if (ellipsoid_flag || tri_flag || body_flag) quat_flag = 1; + if (ellipsoid_flag || tri_flag || body_flag || quat_atom_flag) quat_flag = 1; extra_flag = 0; if (omega_flag || angmom_flag || theta_flag || quat_flag) extra_flag = 1; @@ -329,7 +330,7 @@ FixMove::FixMove(LAMMPS *lmp, int narg, char **arg) : } } - if (quat_flag) { + if (quat_flag && !quat_atom_flag) { double *quat; for (int i = 0; i < nlocal; i++) { quat = nullptr; @@ -349,6 +350,16 @@ FixMove::FixMove(LAMMPS *lmp, int narg, char **arg) : } else qoriginal[i][0] = qoriginal[i][1] = qoriginal[i][2] = qoriginal[i][3] = 0.0; } + } else if (quat_atom_flag) { + double **quat_atom = atom->quat; + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + qoriginal[i][0] = quat_atom[i][0]; + qoriginal[i][1] = quat_atom[i][1]; + qoriginal[i][2] = quat_atom[i][2]; + qoriginal[i][3] = quat_atom[i][3]; + } + } } // nrestart = size of per-atom restart data @@ -521,6 +532,7 @@ void FixMove::initial_integrate(int /*vflag*/) double *radius = atom->radius; double *rmass = atom->rmass; double *mass = atom->mass; + double **quat_atom = atom->quat; int *type = atom->type; int *ellipsoid = atom->ellipsoid; int *line = atom->line; @@ -749,9 +761,9 @@ void FixMove::initial_integrate(int /*vflag*/) avec_line->bonus[atom->line[i]].theta = theta_new; } - // quats for ellipsoids, tris, and bodies + // quats for ellipsoids, tris, bodies, and bpm/sphere - if (quat_flag) { + if (quat_flag && !quat_atom_flag) { quat = nullptr; if (ellipsoid_flag && ellipsoid[i] >= 0) quat = avec_ellipsoid->bonus[ellipsoid[i]].quat; @@ -760,6 +772,8 @@ void FixMove::initial_integrate(int /*vflag*/) else if (body_flag && body[i] >= 0) quat = avec_body->bonus[body[i]].quat; if (quat) MathExtra::quatquat(qrotate, qoriginal[i], quat); + } else if (quat_atom_flag) { + MathExtra::quatquat(qrotate, qoriginal[i], quat_atom[i]); } } @@ -880,9 +894,9 @@ void FixMove::initial_integrate(int /*vflag*/) avec_line->bonus[atom->line[i]].theta = theta_new; } - // quats for ellipsoids, tris, and bodies + // quats for ellipsoids, tris, bodies, and bpm/sphere - if (quat_flag) { + if (quat_flag && !quat_atom_flag) { quat = nullptr; if (ellipsoid_flag && ellipsoid[i] >= 0) quat = avec_ellipsoid->bonus[ellipsoid[i]].quat; @@ -891,6 +905,8 @@ void FixMove::initial_integrate(int /*vflag*/) else if (body_flag && body[i] >= 0) quat = avec_body->bonus[body[i]].quat; if (quat) MathExtra::quatquat(qrotate, qoriginal[i], quat); + } else if (quat_atom_flag) { + MathExtra::quatquat(qrotate, qoriginal[i], quat_atom[i]); } } @@ -1341,9 +1357,9 @@ void FixMove::set_arrays(int i) toriginal[i] = theta - 0.0; // NOTE: edit this line } - // quats for ellipsoids, tris, and bodies + // quats for ellipsoids, tris, bodies, and bpm/sphere - if (quat_flag) { + if (quat_flag & !quat_atom_flag) { quat = nullptr; if (ellipsoid_flag && ellipsoid[i] >= 0) quat = avec_ellipsoid->bonus[ellipsoid[i]].quat; @@ -1354,6 +1370,12 @@ void FixMove::set_arrays(int i) if (quat) { // qoriginal = f(quat,-delta); // NOTE: edit this line } + } else if (quat_atom_flag) { + // double **quat_atom = atom->quat; + // qoriginal[0] = quat_atom[i][0]; // NOTE: edit this line + // qoriginal[1] = quat_atom[i][1]; // NOTE: edit this line + // qoriginal[2] = quat_atom[i][2]; // NOTE: edit this line + // qoriginal[3] = quat_atom[i][3]; // NOTE: edit this line } } xoriginal[i][0] -= vx * delta; @@ -1400,7 +1422,7 @@ void FixMove::set_arrays(int i) // quats for ellipsoids, tris, and bodies - if (quat_flag) { + if (quat_flag && !quat_atom_flag) { quat = nullptr; if (ellipsoid_flag && ellipsoid[i] >= 0) quat = avec_ellipsoid->bonus[ellipsoid[i]].quat; @@ -1411,6 +1433,12 @@ void FixMove::set_arrays(int i) if (quat) { // qoriginal = f(quat,-delta); // NOTE: edit this line } + } else if (quat_atom_flag) { + // double **quat_atom = atom->quat; + // qoriginal[0] = quat_atom[i][0]; // NOTE: edit this line + // qoriginal[1] = quat_atom[i][1]; // NOTE: edit this line + // qoriginal[2] = quat_atom[i][2]; // NOTE: edit this line + // qoriginal[3] = quat_atom[i][3]; // NOTE: edit this line } } } diff --git a/src/fix_move.h b/src/fix_move.h index e3c018f54d..244a9d704a 100644 --- a/src/fix_move.h +++ b/src/fix_move.h @@ -61,7 +61,7 @@ class FixMove : public Fix { int xvar, yvar, zvar, vxvar, vyvar, vzvar; int xvarstyle, yvarstyle, zvarstyle, vxvarstyle, vyvarstyle, vzvarstyle; int extra_flag, omega_flag, angmom_flag; - int radius_flag, ellipsoid_flag, line_flag, tri_flag, body_flag; + int radius_flag, ellipsoid_flag, line_flag, tri_flag, body_flag, quat_atom_flag; int theta_flag, quat_flag; int nlevels_respa, nrestart; int time_origin; diff --git a/src/fmt/args.h b/src/fmt/args.h index 2d684e7cc1..b77a2d0661 100644 --- a/src/fmt/args.h +++ b/src/fmt/args.h @@ -12,7 +12,7 @@ #include // std::unique_ptr #include -#include "core.h" +#include "format.h" // std_string_view FMT_BEGIN_NAMESPACE @@ -22,8 +22,9 @@ template struct is_reference_wrapper : std::false_type {}; template struct is_reference_wrapper> : std::true_type {}; -template const T& unwrap(const T& v) { return v; } -template const T& unwrap(const std::reference_wrapper& v) { +template auto unwrap(const T& v) -> const T& { return v; } +template +auto unwrap(const std::reference_wrapper& v) -> const T& { return static_cast(v); } @@ -50,7 +51,7 @@ class dynamic_arg_list { std::unique_ptr> head_; public: - template const T& push(const Arg& arg) { + template auto push(const Arg& arg) -> const T& { auto new_node = std::unique_ptr>(new typed_node(arg)); auto& value = new_node->value; new_node->next = std::move(head_); @@ -110,14 +111,14 @@ class dynamic_format_arg_store friend class basic_format_args; - unsigned long long get_types() const { + auto get_types() const -> unsigned long long { return detail::is_unpacked_bit | data_.size() | (named_info_.empty() ? 0ULL : static_cast(detail::has_named_args_bit)); } - const basic_format_arg* data() const { + auto data() const -> const basic_format_arg* { return named_info_.empty() ? data_.data() : data_.data() + 1; } diff --git a/src/fmt/chrono.h b/src/fmt/chrono.h index ff3e1445b9..9d54574e16 100644 --- a/src/fmt/chrono.h +++ b/src/fmt/chrono.h @@ -18,7 +18,7 @@ #include #include -#include "format.h" +#include "ostream.h" // formatbuf FMT_BEGIN_NAMESPACE @@ -72,7 +72,8 @@ template ::value && std::numeric_limits::is_signed == std::numeric_limits::is_signed)> -FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec) + -> To { ec = 0; using F = std::numeric_limits; using T = std::numeric_limits; @@ -101,7 +102,8 @@ template ::value && std::numeric_limits::is_signed != std::numeric_limits::is_signed)> -FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec) + -> To { ec = 0; using F = std::numeric_limits; using T = std::numeric_limits; @@ -133,7 +135,8 @@ FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { template ::value)> -FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec) + -> To { ec = 0; return from; } // function @@ -154,7 +157,7 @@ FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { // clang-format on template ::value)> -FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto safe_float_conversion(const From from, int& ec) -> To { ec = 0; using T = std::numeric_limits; static_assert(std::is_floating_point::value, "From must be floating"); @@ -176,7 +179,7 @@ FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { template ::value)> -FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto safe_float_conversion(const From from, int& ec) -> To { ec = 0; static_assert(std::is_floating_point::value, "From must be floating"); return from; @@ -188,8 +191,8 @@ FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { template ::value), FMT_ENABLE_IF(std::is_integral::value)> -To safe_duration_cast(std::chrono::duration from, - int& ec) { +auto safe_duration_cast(std::chrono::duration from, + int& ec) -> To { using From = std::chrono::duration; ec = 0; // the basic idea is that we need to convert from count() in the from type @@ -240,8 +243,8 @@ To safe_duration_cast(std::chrono::duration from, template ::value), FMT_ENABLE_IF(std::is_floating_point::value)> -To safe_duration_cast(std::chrono::duration from, - int& ec) { +auto safe_duration_cast(std::chrono::duration from, + int& ec) -> To { using From = std::chrono::duration; ec = 0; if (std::isnan(from.count())) { @@ -321,12 +324,12 @@ To safe_duration_cast(std::chrono::duration from, namespace detail { template struct null {}; -inline null<> localtime_r FMT_NOMACRO(...) { return null<>(); } -inline null<> localtime_s(...) { return null<>(); } -inline null<> gmtime_r(...) { return null<>(); } -inline null<> gmtime_s(...) { return null<>(); } +inline auto localtime_r FMT_NOMACRO(...) -> null<> { return null<>(); } +inline auto localtime_s(...) -> null<> { return null<>(); } +inline auto gmtime_r(...) -> null<> { return null<>(); } +inline auto gmtime_s(...) -> null<> { return null<>(); } -inline const std::locale& get_classic_locale() { +inline auto get_classic_locale() -> const std::locale& { static const auto& locale = std::locale::classic(); return locale; } @@ -336,8 +339,6 @@ template struct codecvt_result { CodeUnit buf[max_size]; CodeUnit* end; }; -template -constexpr const size_t codecvt_result::max_size; template void write_codecvt(codecvt_result& out, string_view in_buf, @@ -408,8 +409,7 @@ inline void do_write(buffer& buf, const std::tm& time, auto&& format_buf = formatbuf>(buf); auto&& os = std::basic_ostream(&format_buf); os.imbue(loc); - using iterator = std::ostreambuf_iterator; - const auto& facet = std::use_facet>(loc); + const auto& facet = std::use_facet>(loc); auto end = facet.put(os, os, Char(' '), &time, format, modifier); if (end.failed()) FMT_THROW(format_error("failed to format time")); } @@ -432,6 +432,51 @@ auto write(OutputIt out, const std::tm& time, const std::locale& loc, return write_encoded_tm_str(out, string_view(buf.data(), buf.size()), loc); } +template +struct is_same_arithmetic_type + : public std::integral_constant::value && + std::is_integral::value) || + (std::is_floating_point::value && + std::is_floating_point::value)> { +}; + +template < + typename To, typename FromRep, typename FromPeriod, + FMT_ENABLE_IF(is_same_arithmetic_type::value)> +auto fmt_duration_cast(std::chrono::duration from) -> To { +#if FMT_SAFE_DURATION_CAST + // Throwing version of safe_duration_cast is only available for + // integer to integer or float to float casts. + int ec; + To to = safe_duration_cast::safe_duration_cast(from, ec); + if (ec) FMT_THROW(format_error("cannot format duration")); + return to; +#else + // Standard duration cast, may overflow. + return std::chrono::duration_cast(from); +#endif +} + +template < + typename To, typename FromRep, typename FromPeriod, + FMT_ENABLE_IF(!is_same_arithmetic_type::value)> +auto fmt_duration_cast(std::chrono::duration from) -> To { + // Mixed integer <-> float cast is not supported by safe_duration_cast. + return std::chrono::duration_cast(from); +} + +template +auto to_time_t( + std::chrono::time_point time_point) + -> std::time_t { + // Cannot use std::chrono::system_clock::to_time_t since this would first + // require a cast to std::chrono::system_clock::time_point, which could + // overflow. + return fmt_duration_cast>( + time_point.time_since_epoch()) + .count(); +} } // namespace detail FMT_BEGIN_EXPORT @@ -441,29 +486,29 @@ FMT_BEGIN_EXPORT expressed in local time. Unlike ``std::localtime``, this function is thread-safe on most platforms. */ -inline std::tm localtime(std::time_t time) { +inline auto localtime(std::time_t time) -> std::tm { struct dispatcher { std::time_t time_; std::tm tm_; dispatcher(std::time_t t) : time_(t) {} - bool run() { + auto run() -> bool { using namespace fmt::detail; return handle(localtime_r(&time_, &tm_)); } - bool handle(std::tm* tm) { return tm != nullptr; } + auto handle(std::tm* tm) -> bool { return tm != nullptr; } - bool handle(detail::null<>) { + auto handle(detail::null<>) -> bool { using namespace fmt::detail; return fallback(localtime_s(&tm_, &time_)); } - bool fallback(int res) { return res == 0; } + auto fallback(int res) -> bool { return res == 0; } #if !FMT_MSC_VERSION - bool fallback(detail::null<>) { + auto fallback(detail::null<>) -> bool { using namespace fmt::detail; std::tm* tm = std::localtime(&time_); if (tm) tm_ = *tm; @@ -480,8 +525,8 @@ inline std::tm localtime(std::time_t time) { #if FMT_USE_LOCAL_TIME template inline auto localtime(std::chrono::local_time time) -> std::tm { - return localtime(std::chrono::system_clock::to_time_t( - std::chrono::current_zone()->to_sys(time))); + return localtime( + detail::to_time_t(std::chrono::current_zone()->to_sys(time))); } #endif @@ -490,29 +535,29 @@ inline auto localtime(std::chrono::local_time time) -> std::tm { expressed in Coordinated Universal Time (UTC). Unlike ``std::gmtime``, this function is thread-safe on most platforms. */ -inline std::tm gmtime(std::time_t time) { +inline auto gmtime(std::time_t time) -> std::tm { struct dispatcher { std::time_t time_; std::tm tm_; dispatcher(std::time_t t) : time_(t) {} - bool run() { + auto run() -> bool { using namespace fmt::detail; return handle(gmtime_r(&time_, &tm_)); } - bool handle(std::tm* tm) { return tm != nullptr; } + auto handle(std::tm* tm) -> bool { return tm != nullptr; } - bool handle(detail::null<>) { + auto handle(detail::null<>) -> bool { using namespace fmt::detail; return fallback(gmtime_s(&tm_, &time_)); } - bool fallback(int res) { return res == 0; } + auto fallback(int res) -> bool { return res == 0; } #if !FMT_MSC_VERSION - bool fallback(detail::null<>) { + auto fallback(detail::null<>) -> bool { std::tm* tm = std::gmtime(&time_); if (tm) tm_ = *tm; return tm != nullptr; @@ -525,9 +570,11 @@ inline std::tm gmtime(std::time_t time) { return gt.tm_; } -inline std::tm gmtime( - std::chrono::time_point time_point) { - return gmtime(std::chrono::system_clock::to_time_t(time_point)); +template +inline auto gmtime( + std::chrono::time_point time_point) + -> std::tm { + return gmtime(detail::to_time_t(time_point)); } namespace detail { @@ -566,7 +613,8 @@ inline void write_digit2_separated(char* buf, unsigned a, unsigned b, } } -template FMT_CONSTEXPR inline const char* get_units() { +template +FMT_CONSTEXPR inline auto get_units() -> const char* { if (std::is_same::value) return "as"; if (std::is_same::value) return "fs"; if (std::is_same::value) return "ps"; @@ -584,8 +632,9 @@ template FMT_CONSTEXPR inline const char* get_units() { if (std::is_same::value) return "Ts"; if (std::is_same::value) return "Ps"; if (std::is_same::value) return "Es"; - if (std::is_same>::value) return "m"; + if (std::is_same>::value) return "min"; if (std::is_same>::value) return "h"; + if (std::is_same>::value) return "d"; return nullptr; } @@ -621,9 +670,8 @@ auto write_padding(OutputIt out, pad_type pad) -> OutputIt { // Parses a put_time-like format string and invokes handler actions. template -FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, - const Char* end, - Handler&& handler) { +FMT_CONSTEXPR auto parse_chrono_format(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { if (begin == end || *begin == '}') return begin; if (*begin != '%') FMT_THROW(format_error("invalid format")); auto ptr = begin; @@ -954,25 +1002,25 @@ struct tm_format_checker : null_chrono_spec_handler { FMT_CONSTEXPR void on_tz_name() {} }; -inline const char* tm_wday_full_name(int wday) { +inline auto tm_wday_full_name(int wday) -> const char* { static constexpr const char* full_name_list[] = { "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"}; return wday >= 0 && wday <= 6 ? full_name_list[wday] : "?"; } -inline const char* tm_wday_short_name(int wday) { +inline auto tm_wday_short_name(int wday) -> const char* { static constexpr const char* short_name_list[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; return wday >= 0 && wday <= 6 ? short_name_list[wday] : "???"; } -inline const char* tm_mon_full_name(int mon) { +inline auto tm_mon_full_name(int mon) -> const char* { static constexpr const char* full_name_list[] = { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"}; return mon >= 0 && mon <= 11 ? full_name_list[mon] : "?"; } -inline const char* tm_mon_short_name(int mon) { +inline auto tm_mon_short_name(int mon) -> const char* { static constexpr const char* short_name_list[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", @@ -1004,21 +1052,21 @@ inline void tzset_once() { // Converts value to Int and checks that it's in the range [0, upper). template ::value)> -inline Int to_nonnegative_int(T value, Int upper) { - FMT_ASSERT(std::is_unsigned::value || - (value >= 0 && to_unsigned(value) <= to_unsigned(upper)), - "invalid value"); - (void)upper; +inline auto to_nonnegative_int(T value, Int upper) -> Int { + if (!std::is_unsigned::value && + (value < 0 || to_unsigned(value) > to_unsigned(upper))) { + FMT_THROW(fmt::format_error("chrono value is out of range")); + } return static_cast(value); } template ::value)> -inline Int to_nonnegative_int(T value, Int upper) { +inline auto to_nonnegative_int(T value, Int upper) -> Int { if (value < 0 || value > static_cast(upper)) FMT_THROW(format_error("invalid value")); return static_cast(value); } -constexpr long long pow10(std::uint32_t n) { +constexpr auto pow10(std::uint32_t n) -> long long { return n == 0 ? 1 : 10 * pow10(n - 1); } @@ -1052,13 +1100,12 @@ void write_fractional_seconds(OutputIt& out, Duration d, int precision = -1) { std::chrono::seconds::rep>::type, std::ratio<1, detail::pow10(num_fractional_digits)>>; - const auto fractional = - d - std::chrono::duration_cast(d); + const auto fractional = d - fmt_duration_cast(d); const auto subseconds = std::chrono::treat_as_floating_point< typename subsecond_precision::rep>::value ? fractional.count() - : std::chrono::duration_cast(fractional).count(); + : fmt_duration_cast(fractional).count(); auto n = static_cast>(subseconds); const int num_digits = detail::count_digits(n); @@ -1109,11 +1156,11 @@ void write_floating_seconds(memory_buffer& buf, Duration duration, num_fractional_digits = 6; } - format_to(std::back_inserter(buf), FMT_STRING("{:.{}f}"), - std::fmod(val * static_cast(Duration::period::num) / - static_cast(Duration::period::den), - static_cast(60)), - num_fractional_digits); + fmt::format_to(std::back_inserter(buf), FMT_STRING("{:.{}f}"), + std::fmod(val * static_cast(Duration::period::num) / + static_cast(Duration::period::den), + static_cast(60)), + num_fractional_digits); } template (l); } - // Algorithm: - // https://en.wikipedia.org/wiki/ISO_week_date#Calculating_the_week_number_from_a_month_and_day_of_the_month_or_ordinal_date + // Algorithm: https://en.wikipedia.org/wiki/ISO_week_date. auto iso_year_weeks(long long curr_year) const noexcept -> int { const auto prev_year = curr_year - 1; const auto curr_p = @@ -1315,7 +1361,7 @@ class tm_writer { subsecs_(subsecs), tm_(tm) {} - OutputIt out() const { return out_; } + auto out() const -> OutputIt { return out_; } FMT_CONSTEXPR void on_text(const Char* begin, const Char* end) { out_ = copy_str(begin, end, out_); @@ -1579,6 +1625,7 @@ struct chrono_format_checker : null_chrono_spec_handler { template FMT_CONSTEXPR void on_text(const Char*, const Char*) {} + FMT_CONSTEXPR void on_day_of_year() {} FMT_CONSTEXPR void on_24_hour(numeric_system, pad_type) {} FMT_CONSTEXPR void on_12_hour(numeric_system, pad_type) {} FMT_CONSTEXPR void on_minute(numeric_system, pad_type) {} @@ -1597,16 +1644,16 @@ struct chrono_format_checker : null_chrono_spec_handler { template ::value&& has_isfinite::value)> -inline bool isfinite(T) { +inline auto isfinite(T) -> bool { return true; } template ::value)> -inline T mod(T x, int y) { +inline auto mod(T x, int y) -> T { return x % static_cast(y); } template ::value)> -inline T mod(T x, int y) { +inline auto mod(T x, int y) -> T { return std::fmod(x, static_cast(y)); } @@ -1621,49 +1668,38 @@ template struct make_unsigned_or_unchanged { using type = typename std::make_unsigned::type; }; -#if FMT_SAFE_DURATION_CAST -// throwing version of safe_duration_cast -template -To fmt_safe_duration_cast(std::chrono::duration from) { - int ec; - To to = safe_duration_cast::safe_duration_cast(from, ec); - if (ec) FMT_THROW(format_error("cannot format duration")); - return to; -} -#endif - template ::value)> -inline std::chrono::duration get_milliseconds( - std::chrono::duration d) { +inline auto get_milliseconds(std::chrono::duration d) + -> std::chrono::duration { // this may overflow and/or the result may not fit in the // target type. #if FMT_SAFE_DURATION_CAST using CommonSecondsType = typename std::common_type::type; - const auto d_as_common = fmt_safe_duration_cast(d); + const auto d_as_common = fmt_duration_cast(d); const auto d_as_whole_seconds = - fmt_safe_duration_cast(d_as_common); + fmt_duration_cast(d_as_common); // this conversion should be nonproblematic const auto diff = d_as_common - d_as_whole_seconds; const auto ms = - fmt_safe_duration_cast>(diff); + fmt_duration_cast>(diff); return ms; #else - auto s = std::chrono::duration_cast(d); - return std::chrono::duration_cast(d - s); + auto s = fmt_duration_cast(d); + return fmt_duration_cast(d - s); #endif } template ::value)> -OutputIt format_duration_value(OutputIt out, Rep val, int) { +auto format_duration_value(OutputIt out, Rep val, int) -> OutputIt { return write(out, val); } template ::value)> -OutputIt format_duration_value(OutputIt out, Rep val, int precision) { +auto format_duration_value(OutputIt out, Rep val, int precision) -> OutputIt { auto specs = format_specs(); specs.precision = precision; specs.type = precision >= 0 ? presentation_type::fixed_lower @@ -1672,12 +1708,12 @@ OutputIt format_duration_value(OutputIt out, Rep val, int precision) { } template -OutputIt copy_unit(string_view unit, OutputIt out, Char) { +auto copy_unit(string_view unit, OutputIt out, Char) -> OutputIt { return std::copy(unit.begin(), unit.end(), out); } template -OutputIt copy_unit(string_view unit, OutputIt out, wchar_t) { +auto copy_unit(string_view unit, OutputIt out, wchar_t) -> OutputIt { // This works when wchar_t is UTF-32 because units only contain characters // that have the same representation in UTF-16 and UTF-32. utf8_to_utf16 u(unit); @@ -1685,7 +1721,7 @@ OutputIt copy_unit(string_view unit, OutputIt out, wchar_t) { } template -OutputIt format_duration_unit(OutputIt out) { +auto format_duration_unit(OutputIt out) -> OutputIt { if (const char* unit = get_units()) return copy_unit(string_view(unit), out, Char()); *out++ = '['; @@ -1752,18 +1788,12 @@ struct chrono_formatter { // this may overflow and/or the result may not fit in the // target type. -#if FMT_SAFE_DURATION_CAST // might need checked conversion (rep!=Rep) - auto tmpval = std::chrono::duration(val); - s = fmt_safe_duration_cast(tmpval); -#else - s = std::chrono::duration_cast( - std::chrono::duration(val)); -#endif + s = fmt_duration_cast(std::chrono::duration(val)); } // returns true if nan or inf, writes to out. - bool handle_nan_inf() { + auto handle_nan_inf() -> bool { if (isfinite(val)) { return false; } @@ -1780,17 +1810,22 @@ struct chrono_formatter { return true; } - Rep hour() const { return static_cast(mod((s.count() / 3600), 24)); } + auto days() const -> Rep { return static_cast(s.count() / 86400); } + auto hour() const -> Rep { + return static_cast(mod((s.count() / 3600), 24)); + } - Rep hour12() const { + auto hour12() const -> Rep { Rep hour = static_cast(mod((s.count() / 3600), 12)); return hour <= 0 ? 12 : hour; } - Rep minute() const { return static_cast(mod((s.count() / 60), 60)); } - Rep second() const { return static_cast(mod(s.count(), 60)); } + auto minute() const -> Rep { + return static_cast(mod((s.count() / 60), 60)); + } + auto second() const -> Rep { return static_cast(mod(s.count(), 60)); } - std::tm time() const { + auto time() const -> std::tm { auto time = std::tm(); time.tm_hour = to_nonnegative_int(hour(), 24); time.tm_min = to_nonnegative_int(minute(), 60); @@ -1858,10 +1893,14 @@ struct chrono_formatter { void on_dec0_week_of_year(numeric_system) {} void on_dec1_week_of_year(numeric_system) {} void on_iso_week_of_year(numeric_system) {} - void on_day_of_year() {} void on_day_of_month(numeric_system) {} void on_day_of_month_space(numeric_system) {} + void on_day_of_year() { + if (handle_nan_inf()) return; + write(days(), 0); + } + void on_24_hour(numeric_system ns, pad_type pad) { if (handle_nan_inf()) return; @@ -1968,7 +2007,7 @@ class weekday { weekday() = default; explicit constexpr weekday(unsigned wd) noexcept : value(static_cast(wd != 7 ? wd : 0)) {} - constexpr unsigned c_encoding() const noexcept { return value; } + constexpr auto c_encoding() const noexcept -> unsigned { return value; } }; class year_month_day {}; @@ -2083,25 +2122,22 @@ struct formatter, period::num != 1 || period::den != 1 || std::is_floating_point::value)) { const auto epoch = val.time_since_epoch(); - auto subsecs = std::chrono::duration_cast( - epoch - std::chrono::duration_cast(epoch)); + auto subsecs = detail::fmt_duration_cast( + epoch - detail::fmt_duration_cast(epoch)); if (subsecs.count() < 0) { auto second = - std::chrono::duration_cast(std::chrono::seconds(1)); + detail::fmt_duration_cast(std::chrono::seconds(1)); if (epoch.count() < ((Duration::min)() + second).count()) FMT_THROW(format_error("duration is too small")); subsecs += second; val -= second; } - return formatter::do_format( - gmtime(std::chrono::time_point_cast(val)), ctx, - &subsecs); + return formatter::do_format(gmtime(val), ctx, &subsecs); } - return formatter::format( - gmtime(std::chrono::time_point_cast(val)), ctx); + return formatter::format(gmtime(val), ctx); } }; @@ -2120,17 +2156,13 @@ struct formatter, Char> if (period::num != 1 || period::den != 1 || std::is_floating_point::value) { const auto epoch = val.time_since_epoch(); - const auto subsecs = std::chrono::duration_cast( - epoch - std::chrono::duration_cast(epoch)); + const auto subsecs = detail::fmt_duration_cast( + epoch - detail::fmt_duration_cast(epoch)); - return formatter::do_format( - localtime(std::chrono::time_point_cast(val)), - ctx, &subsecs); + return formatter::do_format(localtime(val), ctx, &subsecs); } - return formatter::format( - localtime(std::chrono::time_point_cast(val)), - ctx); + return formatter::format(localtime(val), ctx); } }; #endif diff --git a/src/fmt/color.h b/src/fmt/color.h index 8697e1ca0b..464519e582 100644 --- a/src/fmt/color.h +++ b/src/fmt/color.h @@ -233,7 +233,7 @@ class text_style { FMT_CONSTEXPR text_style(emphasis em = emphasis()) noexcept : set_foreground_color(), set_background_color(), ems(em) {} - FMT_CONSTEXPR text_style& operator|=(const text_style& rhs) { + FMT_CONSTEXPR auto operator|=(const text_style& rhs) -> text_style& { if (!set_foreground_color) { set_foreground_color = rhs.set_foreground_color; foreground_color = rhs.foreground_color; @@ -257,29 +257,29 @@ class text_style { return *this; } - friend FMT_CONSTEXPR text_style operator|(text_style lhs, - const text_style& rhs) { + friend FMT_CONSTEXPR auto operator|(text_style lhs, const text_style& rhs) + -> text_style { return lhs |= rhs; } - FMT_CONSTEXPR bool has_foreground() const noexcept { + FMT_CONSTEXPR auto has_foreground() const noexcept -> bool { return set_foreground_color; } - FMT_CONSTEXPR bool has_background() const noexcept { + FMT_CONSTEXPR auto has_background() const noexcept -> bool { return set_background_color; } - FMT_CONSTEXPR bool has_emphasis() const noexcept { + FMT_CONSTEXPR auto has_emphasis() const noexcept -> bool { return static_cast(ems) != 0; } - FMT_CONSTEXPR detail::color_type get_foreground() const noexcept { + FMT_CONSTEXPR auto get_foreground() const noexcept -> detail::color_type { FMT_ASSERT(has_foreground(), "no foreground specified for this style"); return foreground_color; } - FMT_CONSTEXPR detail::color_type get_background() const noexcept { + FMT_CONSTEXPR auto get_background() const noexcept -> detail::color_type { FMT_ASSERT(has_background(), "no background specified for this style"); return background_color; } - FMT_CONSTEXPR emphasis get_emphasis() const noexcept { + FMT_CONSTEXPR auto get_emphasis() const noexcept -> emphasis { FMT_ASSERT(has_emphasis(), "no emphasis specified for this style"); return ems; } @@ -297,9 +297,11 @@ class text_style { } } - friend FMT_CONSTEXPR text_style fg(detail::color_type foreground) noexcept; + friend FMT_CONSTEXPR auto fg(detail::color_type foreground) noexcept + -> text_style; - friend FMT_CONSTEXPR text_style bg(detail::color_type background) noexcept; + friend FMT_CONSTEXPR auto bg(detail::color_type background) noexcept + -> text_style; detail::color_type foreground_color; detail::color_type background_color; @@ -309,16 +311,19 @@ class text_style { }; /** Creates a text style from the foreground (text) color. */ -FMT_CONSTEXPR inline text_style fg(detail::color_type foreground) noexcept { +FMT_CONSTEXPR inline auto fg(detail::color_type foreground) noexcept + -> text_style { return text_style(true, foreground); } /** Creates a text style from the background color. */ -FMT_CONSTEXPR inline text_style bg(detail::color_type background) noexcept { +FMT_CONSTEXPR inline auto bg(detail::color_type background) noexcept + -> text_style { return text_style(false, background); } -FMT_CONSTEXPR inline text_style operator|(emphasis lhs, emphasis rhs) noexcept { +FMT_CONSTEXPR inline auto operator|(emphasis lhs, emphasis rhs) noexcept + -> text_style { return text_style(lhs) | rhs; } @@ -384,8 +389,8 @@ template struct ansi_color_escape { } FMT_CONSTEXPR operator const Char*() const noexcept { return buffer; } - FMT_CONSTEXPR const Char* begin() const noexcept { return buffer; } - FMT_CONSTEXPR_CHAR_TRAITS const Char* end() const noexcept { + FMT_CONSTEXPR auto begin() const noexcept -> const Char* { return buffer; } + FMT_CONSTEXPR20 auto end() const noexcept -> const Char* { return buffer + std::char_traits::length(buffer); } @@ -400,25 +405,27 @@ template struct ansi_color_escape { out[2] = static_cast('0' + c % 10); out[3] = static_cast(delimiter); } - static FMT_CONSTEXPR bool has_emphasis(emphasis em, emphasis mask) noexcept { + static FMT_CONSTEXPR auto has_emphasis(emphasis em, emphasis mask) noexcept + -> bool { return static_cast(em) & static_cast(mask); } }; template -FMT_CONSTEXPR ansi_color_escape make_foreground_color( - detail::color_type foreground) noexcept { +FMT_CONSTEXPR auto make_foreground_color(detail::color_type foreground) noexcept + -> ansi_color_escape { return ansi_color_escape(foreground, "\x1b[38;2;"); } template -FMT_CONSTEXPR ansi_color_escape make_background_color( - detail::color_type background) noexcept { +FMT_CONSTEXPR auto make_background_color(detail::color_type background) noexcept + -> ansi_color_escape { return ansi_color_escape(background, "\x1b[48;2;"); } template -FMT_CONSTEXPR ansi_color_escape make_emphasis(emphasis em) noexcept { +FMT_CONSTEXPR auto make_emphasis(emphasis em) noexcept + -> ansi_color_escape { return ansi_color_escape(em); } @@ -427,9 +434,10 @@ template inline void reset_color(buffer& buffer) { buffer.append(reset_color.begin(), reset_color.end()); } -template struct styled_arg { +template struct styled_arg : detail::view { const T& value; text_style style; + styled_arg(const T& v, text_style s) : value(v), style(s) {} }; template @@ -510,9 +518,10 @@ void print(const text_style& ts, const S& format_str, const Args&... args) { } template > -inline std::basic_string vformat( +inline auto vformat( const text_style& ts, const S& format_str, - basic_format_args>> args) { + basic_format_args>> args) + -> std::basic_string { basic_memory_buffer buf; detail::vformat_to(buf, ts, detail::to_string_view(format_str), args); return fmt::to_string(buf); @@ -531,8 +540,8 @@ inline std::basic_string vformat( \endrst */ template > -inline std::basic_string format(const text_style& ts, const S& format_str, - const Args&... args) { +inline auto format(const text_style& ts, const S& format_str, + const Args&... args) -> std::basic_string { return fmt::vformat(ts, detail::to_string_view(format_str), fmt::make_format_args>(args...)); } @@ -542,9 +551,10 @@ inline std::basic_string format(const text_style& ts, const S& format_str, */ template ::value)> -OutputIt vformat_to( - OutputIt out, const text_style& ts, basic_string_view format_str, - basic_format_args>> args) { +auto vformat_to(OutputIt out, const text_style& ts, + basic_string_view format_str, + basic_format_args>> args) + -> OutputIt { auto&& buf = detail::get_buffer(out); detail::vformat_to(buf, ts, format_str, args); return detail::get_iterator(buf, out); @@ -562,9 +572,10 @@ OutputIt vformat_to( fmt::emphasis::bold | fg(fmt::color::red), "{}", 42); \endrst */ -template >::value&& - detail::is_string::value> +template < + typename OutputIt, typename S, typename... Args, + bool enable = detail::is_output_iterator>::value && + detail::is_string::value> inline auto format_to(OutputIt out, const text_style& ts, const S& format_str, Args&&... args) -> typename std::enable_if::type { diff --git a/src/fmt/compile.h b/src/fmt/compile.h index af76507f07..71fa69c67e 100644 --- a/src/fmt/compile.h +++ b/src/fmt/compile.h @@ -14,8 +14,8 @@ FMT_BEGIN_NAMESPACE namespace detail { template -FMT_CONSTEXPR inline counting_iterator copy_str(InputIt begin, InputIt end, - counting_iterator it) { +FMT_CONSTEXPR inline auto copy_str(InputIt begin, InputIt end, + counting_iterator it) -> counting_iterator { return it + (end - begin); } @@ -57,7 +57,7 @@ struct udl_compiled_string : compiled_string { #endif template -const T& first(const T& value, const Tail&...) { +auto first(const T& value, const Tail&...) -> const T& { return value; } @@ -489,18 +489,19 @@ FMT_CONSTEXPR OutputIt format_to(OutputIt out, const S&, Args&&... args) { template ::value)> -format_to_n_result format_to_n(OutputIt out, size_t n, - const S& format_str, Args&&... args) { +auto format_to_n(OutputIt out, size_t n, const S& format_str, Args&&... args) + -> format_to_n_result { using traits = detail::fixed_buffer_traits; auto buf = detail::iterator_buffer(out, n); - format_to(std::back_inserter(buf), format_str, std::forward(args)...); + fmt::format_to(std::back_inserter(buf), format_str, + std::forward(args)...); return {buf.out(), buf.count()}; } template ::value)> -FMT_CONSTEXPR20 size_t formatted_size(const S& format_str, - const Args&... args) { +FMT_CONSTEXPR20 auto formatted_size(const S& format_str, const Args&... args) + -> size_t { return fmt::format_to(detail::counting_iterator(), format_str, args...) .count(); } diff --git a/src/fmt/core.h b/src/fmt/core.h index 9f7de781bb..6a53b8c52c 100644 --- a/src/fmt/core.h +++ b/src/fmt/core.h @@ -8,17 +8,15 @@ #ifndef FMT_CORE_H_ #define FMT_CORE_H_ -#include // std::byte -#include // std::FILE -#include // std::strlen -#include -#include -#include // std::addressof -#include -#include +#include // std::byte +#include // std::FILE +#include // std::strlen +#include // CHAR_BIT +#include // std::string +#include // std::enable_if // The fmt library version in the form major * 10000 + minor * 100 + patch. -#define FMT_VERSION 100100 +#define FMT_VERSION 100200 #if defined(__clang__) && !defined(__ibmxl__) # define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) @@ -58,6 +56,12 @@ # define FMT_MSC_WARNING(...) #endif +#ifdef _GLIBCXX_RELEASE +# define FMT_GLIBCXX_RELEASE _GLIBCXX_RELEASE +#else +# define FMT_GLIBCXX_RELEASE 0 +#endif + #ifdef _MSVC_LANG # define FMT_CPLUSPLUS _MSVC_LANG #else @@ -88,6 +92,20 @@ #define FMT_HAS_CPP17_ATTRIBUTE(attribute) \ (FMT_CPLUSPLUS >= 201703L && FMT_HAS_CPP_ATTRIBUTE(attribute)) +#ifndef FMT_DEPRECATED +# if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VERSION >= 1900 +# define FMT_DEPRECATED [[deprecated]] +# else +# if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__) +# define FMT_DEPRECATED __attribute__((deprecated)) +# elif FMT_MSC_VERSION +# define FMT_DEPRECATED __declspec(deprecated) +# else +# define FMT_DEPRECATED /* deprecated */ +# endif +# endif +#endif + // Check if relaxed C++14 constexpr is supported. // GCC doesn't allow throw in constexpr until version 6 (bug 67371). #ifndef FMT_USE_CONSTEXPR @@ -105,30 +123,17 @@ # define FMT_CONSTEXPR #endif -#if ((FMT_CPLUSPLUS >= 202002L) && \ - (!defined(_GLIBCXX_RELEASE) || _GLIBCXX_RELEASE > 9)) || \ - (FMT_CPLUSPLUS >= 201709L && FMT_GCC_VERSION >= 1002) +#if (FMT_CPLUSPLUS >= 202002L || \ + (FMT_CPLUSPLUS >= 201709L && FMT_GCC_VERSION >= 1002)) && \ + ((!FMT_GLIBCXX_RELEASE || FMT_GLIBCXX_RELEASE >= 10) && \ + (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION >= 10000) && \ + (!FMT_MSC_VERSION || FMT_MSC_VERSION >= 1928)) && \ + defined(__cpp_lib_is_constant_evaluated) # define FMT_CONSTEXPR20 constexpr #else # define FMT_CONSTEXPR20 #endif -// Check if constexpr std::char_traits<>::{compare,length} are supported. -#if defined(__GLIBCXX__) -# if FMT_CPLUSPLUS >= 201703L && defined(_GLIBCXX_RELEASE) && \ - _GLIBCXX_RELEASE >= 7 // GCC 7+ libstdc++ has _GLIBCXX_RELEASE. -# define FMT_CONSTEXPR_CHAR_TRAITS constexpr -# endif -#elif defined(_LIBCPP_VERSION) && FMT_CPLUSPLUS >= 201703L && \ - _LIBCPP_VERSION >= 4000 -# define FMT_CONSTEXPR_CHAR_TRAITS constexpr -#elif FMT_MSC_VERSION >= 1914 && FMT_CPLUSPLUS >= 201703L -# define FMT_CONSTEXPR_CHAR_TRAITS constexpr -#endif -#ifndef FMT_CONSTEXPR_CHAR_TRAITS -# define FMT_CONSTEXPR_CHAR_TRAITS -#endif - // Check if exceptions are disabled. #ifndef FMT_EXCEPTIONS # if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || \ @@ -191,33 +196,25 @@ # define FMT_END_EXPORT #endif +#if FMT_GCC_VERSION || FMT_CLANG_VERSION +# define FMT_VISIBILITY(value) __attribute__((visibility(value))) +#else +# define FMT_VISIBILITY(value) +#endif + #if !defined(FMT_HEADER_ONLY) && defined(_WIN32) -# ifdef FMT_LIB_EXPORT +# if defined(FMT_LIB_EXPORT) # define FMT_API __declspec(dllexport) # elif defined(FMT_SHARED) # define FMT_API __declspec(dllimport) # endif -#else -# if defined(FMT_LIB_EXPORT) || defined(FMT_SHARED) -# if defined(__GNUC__) || defined(__clang__) -# define FMT_API __attribute__((visibility("default"))) -# endif -# endif +#elif defined(FMT_LIB_EXPORT) || defined(FMT_SHARED) +# define FMT_API FMT_VISIBILITY("default") #endif #ifndef FMT_API # define FMT_API #endif -// libc++ supports string_view in pre-c++17. -#if FMT_HAS_INCLUDE() && \ - (FMT_CPLUSPLUS >= 201703L || defined(_LIBCPP_VERSION)) -# include -# define FMT_USE_STRING_VIEW -#elif FMT_HAS_INCLUDE("experimental/string_view") && FMT_CPLUSPLUS >= 201402L -# include -# define FMT_USE_EXPERIMENTAL_STRING_VIEW -#endif - #ifndef FMT_UNICODE # define FMT_UNICODE !FMT_MSC_VERSION #endif @@ -228,8 +225,9 @@ __apple_build_version__ >= 14000029L) && \ FMT_CPLUSPLUS >= 202002L) || \ (defined(__cpp_consteval) && \ - (!FMT_MSC_VERSION || _MSC_FULL_VER >= 193030704)) -// consteval is broken in MSVC before VS2022 and Apple clang before 14. + (!FMT_MSC_VERSION || FMT_MSC_VERSION >= 1929)) +// consteval is broken in MSVC before VS2019 version 16.10 and Apple clang +// before 14. # define FMT_CONSTEVAL consteval # define FMT_HAS_CONSTEVAL # else @@ -248,6 +246,15 @@ # endif #endif +// GCC < 5 requires this-> in decltype. +#ifndef FMT_DECLTYPE_THIS +# if FMT_GCC_VERSION && FMT_GCC_VERSION < 500 +# define FMT_DECLTYPE_THIS this-> +# else +# define FMT_DECLTYPE_THIS +# endif +#endif + // Enable minimal optimizations for more compact code in debug mode. FMT_GCC_PRAGMA("GCC push_options") #if !defined(__OPTIMIZE__) && !defined(__NVCOMPILER) && !defined(__LCC__) && \ @@ -269,20 +276,57 @@ template using remove_const_t = typename std::remove_const::type; template using remove_cvref_t = typename std::remove_cv>::type; -template struct type_identity { using type = T; }; +template struct type_identity { + using type = T; +}; template using type_identity_t = typename type_identity::type; template using underlying_t = typename std::underlying_type::type; -// Checks whether T is a container with contiguous storage. -template struct is_contiguous : std::false_type {}; -template -struct is_contiguous> : std::true_type {}; +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500 +// A workaround for gcc 4.8 to make void_t work in a SFINAE context. +template struct void_t_impl { + using type = void; +}; +template using void_t = typename void_t_impl::type; +#else +template using void_t = void; +#endif struct monostate { constexpr monostate() {} }; +// An implementation of back_insert_iterator to avoid dependency on . +template class back_insert_iterator { + private: + Container* container_; + + friend auto get_container(back_insert_iterator it) -> Container& { + return *it.container_; + } + + public: + using difference_type = ptrdiff_t; + FMT_UNCHECKED_ITERATOR(back_insert_iterator); + + explicit back_insert_iterator(Container& c) : container_(&c) {} + + auto operator=(const typename Container::value_type& value) + -> back_insert_iterator& { + container_->push_back(value); + return *this; + } + auto operator*() -> back_insert_iterator& { return *this; } + auto operator++() -> back_insert_iterator& { return *this; } + auto operator++(int) -> back_insert_iterator { return *this; } +}; + +template +auto back_inserter(Container& c) -> back_insert_iterator { + return {c}; +} + // An enable_if helper to be used in template parameters which results in much // shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed // to workaround a bug in MSVC 2019 (see #1140 and #1186). @@ -310,10 +354,9 @@ template FMT_CONSTEXPR void ignore_unused(const T&...) {} constexpr FMT_INLINE auto is_constant_evaluated( bool default_value = false) noexcept -> bool { // Workaround for incompatibility between libstdc++ consteval-based -// std::is_constant_evaluated() implementation and clang-14. -// https://github.com/fmtlib/fmt/issues/3247 -#if FMT_CPLUSPLUS >= 202002L && defined(_GLIBCXX_RELEASE) && \ - _GLIBCXX_RELEASE >= 12 && \ +// std::is_constant_evaluated() implementation and clang-14: +// https://github.com/fmtlib/fmt/issues/3247. +#if FMT_CPLUSPLUS >= 202002L && FMT_GLIBCXX_RELEASE >= 12 && \ (FMT_CLANG_VERSION >= 1400 && FMT_CLANG_VERSION < 1500) ignore_unused(default_value); return __builtin_is_constant_evaluated(); @@ -346,15 +389,6 @@ FMT_NORETURN FMT_API void assert_fail(const char* file, int line, # endif #endif -#if defined(FMT_USE_STRING_VIEW) -template using std_string_view = std::basic_string_view; -#elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW) -template -using std_string_view = std::experimental::basic_string_view; -#else -template struct std_string_view {}; -#endif - #ifdef FMT_USE_INT128 // Do nothing. #elif defined(__SIZEOF_INT128__) && !defined(__NVCC__) && \ @@ -386,6 +420,15 @@ FMT_CONSTEXPR auto to_unsigned(Int value) -> return static_cast::type>(value); } +template +struct is_string_like : std::false_type {}; + +// A heuristic to detect std::string and std::string_view. +template +struct is_string_like().find_first_of( + typename T::value_type(), 0))>> : std::true_type { +}; + FMT_CONSTEXPR inline auto is_utf8() -> bool { FMT_MSC_WARNING(suppress : 4566) constexpr unsigned char section[] = "\u00A7"; @@ -394,8 +437,33 @@ FMT_CONSTEXPR inline auto is_utf8() -> bool { return FMT_UNICODE || (sizeof(section) == 3 && uchar(section[0]) == 0xC2 && uchar(section[1]) == 0xA7); } + +template FMT_CONSTEXPR auto length(const Char* s) -> size_t { + size_t len = 0; + while (*s++) ++len; + return len; +} + +template +FMT_CONSTEXPR auto compare(const Char* s1, const Char* s2, std::size_t n) + -> int { + for (; n != 0; ++s1, ++s2, --n) { + if (*s1 < *s2) return -1; + if (*s1 > *s2) return 1; + } + return 0; +} } // namespace detail +template +using basic_string = + std::basic_string, std::allocator>; + +// Checks whether T is a container with contiguous storage. +template struct is_contiguous : std::false_type {}; +template +struct is_contiguous> : std::true_type {}; + /** An implementation of ``std::basic_string_view`` for pre-C++17. It provides a subset of the API. ``fmt::basic_string_view`` is used for format strings even @@ -420,29 +488,25 @@ template class basic_string_view { : data_(s), size_(count) {} /** - \rst - Constructs a string reference object from a C string computing - the size with ``std::char_traits::length``. - \endrst + Constructs a string reference object from a C string. */ - FMT_CONSTEXPR_CHAR_TRAITS + FMT_CONSTEXPR20 FMT_INLINE basic_string_view(const Char* s) : data_(s), size_(detail::const_check(std::is_same::value && - !detail::is_constant_evaluated(true)) + !detail::is_constant_evaluated(false)) ? std::strlen(reinterpret_cast(s)) - : std::char_traits::length(s)) {} + : detail::length(s)) {} - /** Constructs a string reference from a ``std::basic_string`` object. */ - template - FMT_CONSTEXPR basic_string_view( - const std::basic_string& s) noexcept - : data_(s.data()), size_(s.size()) {} - - template >::value)> - FMT_CONSTEXPR basic_string_view(S s) noexcept + /** + Constructs a string reference from a ``std::basic_string`` or a + ``std::basic_string_view`` object. + */ + template ::value&& std::is_same< + typename S::value_type, Char>::value)> + FMT_CONSTEXPR basic_string_view(const S& s) noexcept : data_(s.data()), size_(s.size()) {} /** Returns a pointer to the string data. */ @@ -463,30 +527,28 @@ template class basic_string_view { size_ -= n; } - FMT_CONSTEXPR_CHAR_TRAITS bool starts_with( - basic_string_view sv) const noexcept { - return size_ >= sv.size_ && - std::char_traits::compare(data_, sv.data_, sv.size_) == 0; + FMT_CONSTEXPR auto starts_with(basic_string_view sv) const noexcept + -> bool { + return size_ >= sv.size_ && detail::compare(data_, sv.data_, sv.size_) == 0; } - FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(Char c) const noexcept { - return size_ >= 1 && std::char_traits::eq(*data_, c); + FMT_CONSTEXPR auto starts_with(Char c) const noexcept -> bool { + return size_ >= 1 && *data_ == c; } - FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(const Char* s) const { + FMT_CONSTEXPR auto starts_with(const Char* s) const -> bool { return starts_with(basic_string_view(s)); } // Lexicographically compare this string reference to other. - FMT_CONSTEXPR_CHAR_TRAITS auto compare(basic_string_view other) const -> int { + FMT_CONSTEXPR auto compare(basic_string_view other) const -> int { size_t str_size = size_ < other.size_ ? size_ : other.size_; - int result = std::char_traits::compare(data_, other.data_, str_size); + int result = detail::compare(data_, other.data_, str_size); if (result == 0) result = size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1); return result; } - FMT_CONSTEXPR_CHAR_TRAITS friend auto operator==(basic_string_view lhs, - basic_string_view rhs) - -> bool { + FMT_CONSTEXPR friend auto operator==(basic_string_view lhs, + basic_string_view rhs) -> bool { return lhs.compare(rhs) == 0; } friend auto operator!=(basic_string_view lhs, basic_string_view rhs) -> bool { @@ -526,21 +588,16 @@ template ::value)> FMT_INLINE auto to_string_view(const Char* s) -> basic_string_view { return s; } -template -inline auto to_string_view(const std::basic_string& s) - -> basic_string_view { - return s; +template ::value)> +inline auto to_string_view(const S& s) + -> basic_string_view { + return s; // std::basic_string[_view] } template constexpr auto to_string_view(basic_string_view s) -> basic_string_view { return s; } -template >::value)> -inline auto to_string_view(std_string_view s) -> basic_string_view { - return s; -} template ::value)> constexpr auto to_string_view(const S& s) -> basic_string_view { @@ -609,10 +666,10 @@ FMT_TYPE_CONSTANT(const Char*, cstring_type); FMT_TYPE_CONSTANT(basic_string_view, string_type); FMT_TYPE_CONSTANT(const void*, pointer_type); -constexpr bool is_integral_type(type t) { +constexpr auto is_integral_type(type t) -> bool { return t > type::none_type && t <= type::last_integer_type; } -constexpr bool is_arithmetic_type(type t) { +constexpr auto is_arithmetic_type(type t) -> bool { return t > type::none_type && t <= type::last_numeric_type; } @@ -635,21 +692,10 @@ enum { cstring_set = set(type::cstring_type), pointer_set = set(type::pointer_type) }; - -FMT_NORETURN FMT_API void throw_format_error(const char* message); - -struct error_handler { - constexpr error_handler() = default; - - // This function is intentionally not constexpr to give a compile-time error. - FMT_NORETURN void on_error(const char* message) { - throw_format_error(message); - } -}; } // namespace detail /** Throws ``format_error`` with a given message. */ -using detail::throw_format_error; +FMT_NORETURN FMT_API void throw_format_error(const char* message); /** String's character type. */ template using char_t = typename detail::char_t_impl::type; @@ -701,7 +747,7 @@ template class basic_format_parse_context { */ FMT_CONSTEXPR auto next_arg_id() -> int { if (next_arg_id_ < 0) { - detail::throw_format_error( + throw_format_error( "cannot switch from manual to automatic argument indexing"); return 0; } @@ -716,7 +762,7 @@ template class basic_format_parse_context { */ FMT_CONSTEXPR void check_arg_id(int id) { if (next_arg_id_ > 0) { - detail::throw_format_error( + throw_format_error( "cannot switch from automatic to manual argument indexing"); return; } @@ -769,35 +815,6 @@ class compile_parse_context : public basic_format_parse_context { } }; -// Extracts a reference to the container from back_insert_iterator. -template -inline auto get_container(std::back_insert_iterator it) - -> Container& { - using base = std::back_insert_iterator; - struct accessor : base { - accessor(base b) : base(b) {} - using base::container; - }; - return *accessor(it).container; -} - -template -FMT_CONSTEXPR auto copy_str(InputIt begin, InputIt end, OutputIt out) - -> OutputIt { - while (begin != end) *out++ = static_cast(*begin++); - return out; -} - -template , U>::value&& is_char::value)> -FMT_CONSTEXPR auto copy_str(T* begin, T* end, U* out) -> U* { - if (is_constant_evaluated()) return copy_str(begin, end, out); - auto size = to_unsigned(end - begin); - if (size > 0) memcpy(out, begin, size * sizeof(U)); - return out + size; -} - /** \rst A contiguous memory buffer with an optional growing ability. It is an internal @@ -810,13 +827,18 @@ template class buffer { size_t size_; size_t capacity_; + using grow_fun = void (*)(buffer& buf, size_t capacity); + grow_fun grow_; + protected: // Don't initialize ptr_ since it is not accessed to save a few cycles. FMT_MSC_WARNING(suppress : 26495) - buffer(size_t sz) noexcept : size_(sz), capacity_(sz) {} + FMT_CONSTEXPR buffer(grow_fun grow, size_t sz) noexcept + : size_(sz), capacity_(sz), grow_(grow) {} - FMT_CONSTEXPR20 buffer(T* p = nullptr, size_t sz = 0, size_t cap = 0) noexcept - : ptr_(p), size_(sz), capacity_(cap) {} + FMT_CONSTEXPR20 buffer(grow_fun grow, T* p = nullptr, size_t sz = 0, + size_t cap = 0) noexcept + : ptr_(p), size_(sz), capacity_(cap), grow_(grow) {} FMT_CONSTEXPR20 ~buffer() = default; buffer(buffer&&) = default; @@ -827,9 +849,6 @@ template class buffer { capacity_ = buf_capacity; } - /** Increases the buffer capacity to hold at least *capacity* elements. */ - virtual FMT_CONSTEXPR20 void grow(size_t capacity) = 0; - public: using value_type = T; using const_reference = const T&; @@ -868,7 +887,7 @@ template class buffer { // for at least one additional element either by increasing the capacity or by // flushing the buffer if it is full. FMT_CONSTEXPR20 void try_reserve(size_t new_capacity) { - if (new_capacity > capacity_) grow(new_capacity); + if (new_capacity > capacity_) grow_(*this, new_capacity); } FMT_CONSTEXPR20 void push_back(const T& value) { @@ -917,22 +936,25 @@ class iterator_buffer final : public Traits, public buffer { enum { buffer_size = 256 }; T data_[buffer_size]; - protected: - FMT_CONSTEXPR20 void grow(size_t) override { - if (this->size() == buffer_size) flush(); + static FMT_CONSTEXPR20 void grow(buffer& buf, size_t) { + if (buf.size() == buffer_size) static_cast(buf).flush(); } void flush() { auto size = this->size(); this->clear(); - out_ = copy_str(data_, data_ + this->limit(size), out_); + const T* begin = data_; + const T* end = begin + this->limit(size); + while (begin != end) *out_++ = *begin++; } public: explicit iterator_buffer(OutputIt out, size_t n = buffer_size) - : Traits(n), buffer(data_, 0, buffer_size), out_(out) {} + : Traits(n), buffer(grow, data_, 0, buffer_size), out_(out) {} iterator_buffer(iterator_buffer&& other) - : Traits(other), buffer(data_, 0, buffer_size), out_(other.out_) {} + : Traits(other), + buffer(grow, data_, 0, buffer_size), + out_(other.out_) {} ~iterator_buffer() { flush(); } auto out() -> OutputIt { @@ -951,9 +973,9 @@ class iterator_buffer final enum { buffer_size = 256 }; T data_[buffer_size]; - protected: - FMT_CONSTEXPR20 void grow(size_t) override { - if (this->size() == this->capacity()) flush(); + static FMT_CONSTEXPR20 void grow(buffer& buf, size_t) { + if (buf.size() == buf.capacity()) + static_cast(buf).flush(); } void flush() { @@ -967,10 +989,10 @@ class iterator_buffer final public: explicit iterator_buffer(T* out, size_t n = buffer_size) - : fixed_buffer_traits(n), buffer(out, 0, n), out_(out) {} + : fixed_buffer_traits(n), buffer(grow, out, 0, n), out_(out) {} iterator_buffer(iterator_buffer&& other) : fixed_buffer_traits(other), - buffer(std::move(other)), + buffer(static_cast(other)), out_(other.out_) { if (this->data() != out_) { this->set(data_, buffer_size); @@ -989,38 +1011,37 @@ class iterator_buffer final }; template class iterator_buffer final : public buffer { - protected: - FMT_CONSTEXPR20 void grow(size_t) override {} - public: - explicit iterator_buffer(T* out, size_t = 0) : buffer(out, 0, ~size_t()) {} + explicit iterator_buffer(T* out, size_t = 0) + : buffer([](buffer&, size_t) {}, out, 0, ~size_t()) {} auto out() -> T* { return &*this->end(); } }; // A buffer that writes to a container with the contiguous storage. template -class iterator_buffer, +class iterator_buffer, enable_if_t::value, typename Container::value_type>> final : public buffer { private: + using value_type = typename Container::value_type; Container& container_; - protected: - FMT_CONSTEXPR20 void grow(size_t capacity) override { - container_.resize(capacity); - this->set(&container_[0], capacity); + static FMT_CONSTEXPR20 void grow(buffer& buf, size_t capacity) { + auto& self = static_cast(buf); + self.container_.resize(capacity); + self.set(&self.container_[0], capacity); } public: explicit iterator_buffer(Container& c) - : buffer(c.size()), container_(c) {} - explicit iterator_buffer(std::back_insert_iterator out, size_t = 0) + : buffer(grow, c.size()), container_(c) {} + explicit iterator_buffer(back_insert_iterator out, size_t = 0) : iterator_buffer(get_container(out)) {} - auto out() -> std::back_insert_iterator { - return std::back_inserter(container_); + auto out() -> back_insert_iterator { + return fmt::back_inserter(container_); } }; @@ -1031,15 +1052,14 @@ template class counting_buffer final : public buffer { T data_[buffer_size]; size_t count_ = 0; - protected: - FMT_CONSTEXPR20 void grow(size_t) override { - if (this->size() != buffer_size) return; - count_ += this->size(); - this->clear(); + static FMT_CONSTEXPR20 void grow(buffer& buf, size_t) { + if (buf.size() != buffer_size) return; + static_cast(buf).count_ += buf.size(); + buf.clear(); } public: - counting_buffer() : buffer(data_, 0, buffer_size) {} + counting_buffer() : buffer(grow, data_, 0, buffer_size) {} auto count() -> size_t { return count_ + this->size(); } }; @@ -1053,7 +1073,7 @@ FMT_CONSTEXPR void basic_format_parse_context::do_check_arg_id(int id) { (!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) { using context = detail::compile_parse_context; if (id >= static_cast(this)->num_args()) - detail::throw_format_error("argument not found"); + throw_format_error("argument not found"); } } @@ -1085,18 +1105,29 @@ template using has_formatter = std::is_constructible>; -// An output iterator that appends to a buffer. -// It is used to reduce symbol sizes for the common case. -class appender : public std::back_insert_iterator> { - using base = std::back_insert_iterator>; +// An output iterator that appends to a buffer. It is used instead of +// back_insert_iterator to reduce symbol sizes for the common case. +class appender { + private: + detail::buffer* buffer_; + + friend auto get_container(appender app) -> detail::buffer& { + return *app.buffer_; + } public: - using std::back_insert_iterator>::back_insert_iterator; - appender(base it) noexcept : base(it) {} + using difference_type = ptrdiff_t; FMT_UNCHECKED_ITERATOR(appender); - auto operator++() noexcept -> appender& { return *this; } - auto operator++(int) noexcept -> appender { return *this; } + appender(detail::buffer& buf) : buffer_(&buf) {} + + auto operator=(char c) -> appender& { + buffer_->push_back(c); + return *this; + } + auto operator*() -> appender& { return *this; } + auto operator++() -> appender& { return *this; } + auto operator++(int) -> appender { return *this; } }; namespace detail { @@ -1119,7 +1150,7 @@ constexpr auto has_const_formatter() -> bool { template using buffer_appender = conditional_t::value, appender, - std::back_insert_iterator>>; + back_insert_iterator>>; // Maps an output iterator to a buffer. template @@ -1128,7 +1159,7 @@ auto get_buffer(OutputIt out) -> iterator_buffer { } template , Buf>::value)> -auto get_buffer(std::back_insert_iterator out) -> buffer& { +auto get_buffer(back_insert_iterator out) -> buffer& { return get_container(out); } @@ -1293,7 +1324,13 @@ template class value { template FMT_CONSTEXPR20 FMT_INLINE value(T& val) { using value_type = remove_const_t; - custom.value = const_cast(std::addressof(val)); + // T may overload operator& e.g. std::vector::reference in libc++. +#ifdef __cpp_if_constexpr + if constexpr (std::is_same::value) + custom.value = const_cast(&val); +#endif + if (!is_constant_evaluated()) + custom.value = const_cast(&reinterpret_cast(val)); // Get the formatter type through the context to allow different contexts // have different extension points, e.g. `formatter` for `format` and // `printf_formatter` for `printf`. @@ -1314,6 +1351,7 @@ template class value { parse_ctx.advance_to(f.parse(parse_ctx)); using qualified_type = conditional_t(), const T, T>; + // Calling format through a mutable reference is deprecated. ctx.advance_to(f.format(*static_cast(arg), ctx)); } }; @@ -1327,7 +1365,7 @@ using ulong_type = conditional_t; template struct format_as_result { template ::value || std::is_class::value)> - static auto map(U*) -> decltype(format_as(std::declval())); + static auto map(U*) -> remove_cvref_t()))>; static auto map(...) -> void; using type = decltype(map(static_cast(nullptr))); @@ -1444,7 +1482,8 @@ template struct arg_mapper { // Only map owning types because mapping views can be unsafe. template , FMT_ENABLE_IF(std::is_arithmetic::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T& val) -> decltype(this->map(U())) { + FMT_CONSTEXPR FMT_INLINE auto map(const T& val) + -> decltype(FMT_DECLTYPE_THIS map(U())) { return map(format_as(val)); } @@ -1468,13 +1507,14 @@ template struct arg_mapper { !is_string::value && !is_char::value && !is_named_arg::value && !std::is_arithmetic>::value)> - FMT_CONSTEXPR FMT_INLINE auto map(T& val) -> decltype(this->do_map(val)) { + FMT_CONSTEXPR FMT_INLINE auto map(T& val) + -> decltype(FMT_DECLTYPE_THIS do_map(val)) { return do_map(val); } template ::value)> FMT_CONSTEXPR FMT_INLINE auto map(const T& named_arg) - -> decltype(this->map(named_arg.value)) { + -> decltype(FMT_DECLTYPE_THIS map(named_arg.value)) { return map(named_arg.value); } @@ -1493,45 +1533,19 @@ enum { max_packed_args = 62 / packed_arg_bits }; enum : unsigned long long { is_unpacked_bit = 1ULL << 63 }; enum : unsigned long long { has_named_args_bit = 1ULL << 62 }; -template -auto copy_str(InputIt begin, InputIt end, appender out) -> appender { - get_container(out).append(begin, end); - return out; -} -template -auto copy_str(InputIt begin, InputIt end, - std::back_insert_iterator out) - -> std::back_insert_iterator { - get_container(out).append(begin, end); - return out; -} - -template -FMT_CONSTEXPR auto copy_str(R&& rng, OutputIt out) -> OutputIt { - return detail::copy_str(rng.begin(), rng.end(), out); -} - -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500 -// A workaround for gcc 4.8 to make void_t work in a SFINAE context. -template struct void_t_impl { using type = void; }; -template using void_t = typename void_t_impl::type; -#else -template using void_t = void; -#endif - template struct is_output_iterator : std::false_type {}; +template <> struct is_output_iterator : std::true_type {}; + template struct is_output_iterator< - It, T, - void_t::iterator_category, - decltype(*std::declval() = std::declval())>> + It, T, void_t()++ = std::declval())>> : std::true_type {}; template struct is_back_insert_iterator : std::false_type {}; template -struct is_back_insert_iterator> +struct is_back_insert_iterator> : std::true_type {}; // A type-erased reference to an std::locale to avoid a heavy include. @@ -1607,8 +1621,8 @@ FMT_CONSTEXPR inline auto make_arg(T& val) -> basic_format_arg { } // namespace detail FMT_BEGIN_EXPORT -// A formatting argument. It is a trivially copyable/constructible type to -// allow storage in basic_memory_buffer. +// A formatting argument. Context is a template parameter for the compiled API +// where output can be unbuffered. template class basic_format_arg { private: detail::value value_; @@ -1618,11 +1632,6 @@ template class basic_format_arg { friend FMT_CONSTEXPR auto detail::make_arg(T& value) -> basic_format_arg; - template - friend FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis, - const basic_format_arg& arg) - -> decltype(vis(0)); - friend class basic_format_args; friend class dynamic_format_arg_store; @@ -1660,55 +1669,68 @@ template class basic_format_arg { auto is_arithmetic() const -> bool { return detail::is_arithmetic_type(type_); } + + /** + \rst + Visits an argument dispatching to the appropriate visit method based on + the argument type. For example, if the argument type is ``double`` then + ``vis(value)`` will be called with the value of type ``double``. + \endrst + */ + template + FMT_CONSTEXPR auto visit(Visitor&& vis) -> decltype(vis(0)) { + switch (type_) { + case detail::type::none_type: + break; + case detail::type::int_type: + return vis(value_.int_value); + case detail::type::uint_type: + return vis(value_.uint_value); + case detail::type::long_long_type: + return vis(value_.long_long_value); + case detail::type::ulong_long_type: + return vis(value_.ulong_long_value); + case detail::type::int128_type: + return vis(detail::convert_for_visit(value_.int128_value)); + case detail::type::uint128_type: + return vis(detail::convert_for_visit(value_.uint128_value)); + case detail::type::bool_type: + return vis(value_.bool_value); + case detail::type::char_type: + return vis(value_.char_value); + case detail::type::float_type: + return vis(value_.float_value); + case detail::type::double_type: + return vis(value_.double_value); + case detail::type::long_double_type: + return vis(value_.long_double_value); + case detail::type::cstring_type: + return vis(value_.string.data); + case detail::type::string_type: + using sv = basic_string_view; + return vis(sv(value_.string.data, value_.string.size)); + case detail::type::pointer_type: + return vis(value_.pointer); + case detail::type::custom_type: + return vis(typename basic_format_arg::handle(value_.custom)); + } + return vis(monostate()); + } + + FMT_INLINE auto format_custom(const char_type* parse_begin, + typename Context::parse_context_type& parse_ctx, + Context& ctx) -> bool { + if (type_ != detail::type::custom_type) return false; + parse_ctx.advance_to(parse_begin); + value_.custom.format(value_.custom.value, parse_ctx, ctx); + return true; + } }; -/** - \rst - Visits an argument dispatching to the appropriate visit method based on - the argument type. For example, if the argument type is ``double`` then - ``vis(value)`` will be called with the value of type ``double``. - \endrst - */ -// DEPRECATED! template -FMT_CONSTEXPR FMT_INLINE auto visit_format_arg( +FMT_DEPRECATED FMT_CONSTEXPR FMT_INLINE auto visit_format_arg( Visitor&& vis, const basic_format_arg& arg) -> decltype(vis(0)) { - switch (arg.type_) { - case detail::type::none_type: - break; - case detail::type::int_type: - return vis(arg.value_.int_value); - case detail::type::uint_type: - return vis(arg.value_.uint_value); - case detail::type::long_long_type: - return vis(arg.value_.long_long_value); - case detail::type::ulong_long_type: - return vis(arg.value_.ulong_long_value); - case detail::type::int128_type: - return vis(detail::convert_for_visit(arg.value_.int128_value)); - case detail::type::uint128_type: - return vis(detail::convert_for_visit(arg.value_.uint128_value)); - case detail::type::bool_type: - return vis(arg.value_.bool_value); - case detail::type::char_type: - return vis(arg.value_.char_value); - case detail::type::float_type: - return vis(arg.value_.float_value); - case detail::type::double_type: - return vis(arg.value_.double_value); - case detail::type::long_double_type: - return vis(arg.value_.long_double_value); - case detail::type::cstring_type: - return vis(arg.value_.string.data); - case detail::type::string_type: - using sv = basic_string_view; - return vis(sv(arg.value_.string.data, arg.value_.string.size)); - case detail::type::pointer_type: - return vis(arg.value_.pointer); - case detail::type::custom_type: - return vis(typename basic_format_arg::handle(arg.value_.custom)); - } - return vis(monostate()); + return arg.visit(std::forward(vis)); } // Formatting context. @@ -1748,8 +1770,8 @@ template class basic_format_context { } auto args() const -> const format_args& { return args_; } - FMT_CONSTEXPR auto error_handler() -> detail::error_handler { return {}; } - void on_error(const char* message) { error_handler().on_error(message); } + // This function is intentionally not constexpr to give a compile-time error. + void on_error(const char* message) { throw_format_error(message); } // Returns an iterator to the beginning of the output range. FMT_CONSTEXPR auto out() -> iterator { return out_; } @@ -1831,7 +1853,7 @@ class format_arg_store // Arguments are taken by lvalue references to avoid some lifetime issues. template constexpr auto make_format_args(T&... args) - -> format_arg_store...> { + -> format_arg_store...> { return {args...}; } @@ -2107,11 +2129,8 @@ struct dynamic_format_specs : format_specs { }; // Converts a character to ASCII. Returns '\0' on conversion failure. -template ::value)> -constexpr auto to_ascii(Char c) -> char { - return c <= 0xff ? static_cast(c) : '\0'; -} -template ::value)> +template ::value || + std::is_enum::value)> constexpr auto to_ascii(Char c) -> char { return c <= 0xff ? static_cast(c) : '\0'; } @@ -2156,11 +2175,11 @@ FMT_CONSTEXPR auto parse_nonnegative_int(const Char*& begin, const Char* end, } while (p != end && '0' <= *p && *p <= '9'); auto num_digits = p - begin; begin = p; - if (num_digits <= std::numeric_limits::digits10) - return static_cast(value); + int digits10 = static_cast(sizeof(int) * CHAR_BIT * 3 / 10); + if (num_digits <= digits10) return static_cast(value); // Check for overflow. - const unsigned max = to_unsigned((std::numeric_limits::max)()); - return num_digits == std::numeric_limits::digits10 + 1 && + unsigned max = INT_MAX; + return num_digits == digits10 + 1 && prev * 10ull + unsigned(p[-1] - '0') <= max ? static_cast(value) : error_value; @@ -2188,9 +2207,8 @@ FMT_CONSTEXPR auto do_parse_arg_id(const Char* begin, const Char* end, Char c = *begin; if (c >= '0' && c <= '9') { int index = 0; - constexpr int max = (std::numeric_limits::max)(); if (c != '0') - index = parse_nonnegative_int(begin, end, max); + index = parse_nonnegative_int(begin, end, INT_MAX); else ++begin; if (begin == end || (*begin != '}' && *begin != ':')) @@ -2309,9 +2327,12 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs( dynamic_format_specs& specs; type arg_type; - FMT_CONSTEXPR auto operator()(pres type, int set) -> const Char* { - if (!in(arg_type, set)) throw_format_error("invalid format specifier"); - specs.type = type; + FMT_CONSTEXPR auto operator()(pres pres_type, int set) -> const Char* { + if (!in(arg_type, set)) { + if (arg_type == type::none_type) return begin; + throw_format_error("invalid format specifier"); + } + specs.type = pres_type; return begin + 1; } } parse_presentation_type{begin, specs, arg_type}; @@ -2328,6 +2349,7 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs( case '+': case '-': case ' ': + if (arg_type == type::none_type) return begin; enter_state(state::sign, in(arg_type, sint_set | float_set)); switch (c) { case '+': @@ -2343,14 +2365,17 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs( ++begin; break; case '#': + if (arg_type == type::none_type) return begin; enter_state(state::hash, is_arithmetic_type(arg_type)); specs.alt = true; ++begin; break; case '0': enter_state(state::zero); - if (!is_arithmetic_type(arg_type)) + if (!is_arithmetic_type(arg_type)) { + if (arg_type == type::none_type) return begin; throw_format_error("format specifier requires numeric argument"); + } if (specs.align == align::none) { // Ignore 0 if align is specified for compatibility with std::format. specs.align = align::numeric; @@ -2372,12 +2397,14 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs( begin = parse_dynamic_spec(begin, end, specs.width, specs.width_ref, ctx); break; case '.': + if (arg_type == type::none_type) return begin; enter_state(state::precision, in(arg_type, float_set | string_set | cstring_set)); begin = parse_precision(begin, end, specs.precision, specs.precision_ref, ctx); break; case 'L': + if (arg_type == type::none_type) return begin; enter_state(state::locale, is_arithmetic_type(arg_type)); specs.localized = true; ++begin; @@ -2411,6 +2438,8 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs( case 'G': return parse_presentation_type(pres::general_upper, float_set); case 'c': + if (arg_type == type::bool_type) + throw_format_error("invalid format specifier"); return parse_presentation_type(pres::chr, integral_set); case 's': return parse_presentation_type(pres::string, @@ -2550,9 +2579,9 @@ FMT_CONSTEXPR auto parse_format_specs(ParseContext& ctx) decltype(arg_mapper().map(std::declval())), typename strip_named_arg::type>; // LAMMPS customization. Fails to compile with (some) Intel compilers -#if defined(__cpp_if_constexpr) && 0 - if constexpr (std::is_default_constructible_v< - formatter>) { +#if defined(__cpp_if_constexpr) && 1 + if constexpr (std::is_default_constructible< + formatter>::value) { return formatter().parse(ctx); } else { type_is_unformattable_for _; @@ -2675,9 +2704,11 @@ void check_format_string(S format_str) { template struct vformat_args { using type = basic_format_args< - basic_format_context>, Char>>; + basic_format_context>, Char>>; +}; +template <> struct vformat_args { + using type = format_args; }; -template <> struct vformat_args { using type = format_args; }; // Use vformat_args and avoid type_identity to keep symbols short. template @@ -2779,7 +2810,7 @@ using format_string = basic_format_string...>; inline auto runtime(string_view s) -> runtime_format_string<> { return {{s}}; } #endif -FMT_API auto vformat(string_view fmt, format_args args) -> std::string; +FMT_API auto vformat(string_view fmt, format_args args) -> basic_string; /** \rst @@ -2794,7 +2825,7 @@ FMT_API auto vformat(string_view fmt, format_args args) -> std::string; */ template FMT_NODISCARD FMT_INLINE auto format(format_string fmt, T&&... args) - -> std::string { + -> basic_string { return vformat(fmt, fmt::make_format_args(args...)); } @@ -2816,7 +2847,7 @@ auto vformat_to(OutputIt out, string_view fmt, format_args args) -> OutputIt { **Example**:: auto out = std::vector(); - fmt::format_to(std::back_inserter(out), "{}", 42); + fmt::format_to(fmt::back_inserter(out), "{}", 42); \endrst */ template #endif -#ifdef _WIN32 +#if defined(_WIN32) && !defined(FMT_WINDOWS_NO_WCHAR) # include // _isatty #endif @@ -36,10 +36,6 @@ FMT_FUNC void assert_fail(const char* file, int line, const char* message) { std::terminate(); } -FMT_FUNC void throw_format_error(const char* message) { - FMT_THROW(format_error(message)); -} - FMT_FUNC void format_error_code(detail::buffer& out, int error_code, string_view message) noexcept { // Report error code making sure that the output fits into @@ -58,8 +54,8 @@ FMT_FUNC void format_error_code(detail::buffer& out, int error_code, error_code_size += detail::to_unsigned(detail::count_digits(abs_value)); auto it = buffer_appender(out); if (message.size() <= inline_buffer_size - error_code_size) - format_to(it, FMT_STRING("{}{}"), message, SEP); - format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code); + fmt::format_to(it, FMT_STRING("{}{}"), message, SEP); + fmt::format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code); FMT_ASSERT(out.size() <= inline_buffer_size, ""); } @@ -73,9 +69,8 @@ FMT_FUNC void report_error(format_func func, int error_code, } // A wrapper around fwrite that throws on error. -inline void fwrite_fully(const void* ptr, size_t size, size_t count, - FILE* stream) { - size_t written = std::fwrite(ptr, size, count, stream); +inline void fwrite_fully(const void* ptr, size_t count, FILE* stream) { + size_t written = std::fwrite(ptr, 1, count, stream); if (written < count) FMT_THROW(system_error(errno, FMT_STRING("cannot write to file"))); } @@ -86,7 +81,7 @@ locale_ref::locale_ref(const Locale& loc) : locale_(&loc) { static_assert(std::is_same::value, ""); } -template Locale locale_ref::get() const { +template auto locale_ref::get() const -> Locale { static_assert(std::is_same::value, ""); return locale_ ? *static_cast(locale_) : std::locale(); } @@ -98,7 +93,8 @@ FMT_FUNC auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result { auto thousands_sep = grouping.empty() ? Char() : facet.thousands_sep(); return {std::move(grouping), thousands_sep}; } -template FMT_FUNC Char decimal_point_impl(locale_ref loc) { +template +FMT_FUNC auto decimal_point_impl(locale_ref loc) -> Char { return std::use_facet>(loc.get()) .decimal_point(); } @@ -127,6 +123,10 @@ FMT_FUNC auto write_loc(appender out, loc_value value, } } // namespace detail +FMT_FUNC void throw_format_error(const char* message) { + FMT_THROW(format_error(message)); +} + template typename Locale::id format_facet::id; #ifndef FMT_STATIC_THOUSANDS_SEPARATOR @@ -144,24 +144,25 @@ FMT_API FMT_FUNC auto format_facet::do_put( } #endif -FMT_FUNC std::system_error vsystem_error(int error_code, string_view fmt, - format_args args) { +FMT_FUNC auto vsystem_error(int error_code, string_view fmt, format_args args) + -> std::system_error { auto ec = std::error_code(error_code, std::generic_category()); return std::system_error(ec, vformat(fmt, args)); } namespace detail { -template inline bool operator==(basic_fp x, basic_fp y) { +template +inline auto operator==(basic_fp x, basic_fp y) -> bool { return x.f == y.f && x.e == y.e; } // Compilers should be able to optimize this into the ror instruction. -FMT_CONSTEXPR inline uint32_t rotr(uint32_t n, uint32_t r) noexcept { +FMT_CONSTEXPR inline auto rotr(uint32_t n, uint32_t r) noexcept -> uint32_t { r &= 31; return (n >> r) | (n << (32 - r)); } -FMT_CONSTEXPR inline uint64_t rotr(uint64_t n, uint32_t r) noexcept { +FMT_CONSTEXPR inline auto rotr(uint64_t n, uint32_t r) noexcept -> uint64_t { r &= 63; return (n >> r) | (n << (64 - r)); } @@ -170,14 +171,14 @@ FMT_CONSTEXPR inline uint64_t rotr(uint64_t n, uint32_t r) noexcept { namespace dragonbox { // Computes upper 64 bits of multiplication of a 32-bit unsigned integer and a // 64-bit unsigned integer. -inline uint64_t umul96_upper64(uint32_t x, uint64_t y) noexcept { +inline auto umul96_upper64(uint32_t x, uint64_t y) noexcept -> uint64_t { return umul128_upper64(static_cast(x) << 32, y); } // Computes lower 128 bits of multiplication of a 64-bit unsigned integer and a // 128-bit unsigned integer. -inline uint128_fallback umul192_lower128(uint64_t x, - uint128_fallback y) noexcept { +inline auto umul192_lower128(uint64_t x, uint128_fallback y) noexcept + -> uint128_fallback { uint64_t high = x * y.high(); uint128_fallback high_low = umul128(x, y.low()); return {high + high_low.high(), high_low.low()}; @@ -185,12 +186,12 @@ inline uint128_fallback umul192_lower128(uint64_t x, // Computes lower 64 bits of multiplication of a 32-bit unsigned integer and a // 64-bit unsigned integer. -inline uint64_t umul96_lower64(uint32_t x, uint64_t y) noexcept { +inline auto umul96_lower64(uint32_t x, uint64_t y) noexcept -> uint64_t { return x * y; } // Various fast log computations. -inline int floor_log10_pow2_minus_log10_4_over_3(int e) noexcept { +inline auto floor_log10_pow2_minus_log10_4_over_3(int e) noexcept -> int { FMT_ASSERT(e <= 2936 && e >= -2985, "too large exponent"); return (e * 631305 - 261663) >> 21; } @@ -204,7 +205,7 @@ FMT_INLINE_VARIABLE constexpr struct { // divisible by pow(10, N). // Precondition: n <= pow(10, N + 1). template -bool check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept { +auto check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept -> bool { // The numbers below are chosen such that: // 1. floor(n/d) = floor(nm / 2^k) where d=10 or d=100, // 2. nm mod 2^k < m if and only if n is divisible by d, @@ -229,7 +230,7 @@ bool check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept { // Computes floor(n / pow(10, N)) for small n and N. // Precondition: n <= pow(10, N + 1). -template uint32_t small_division_by_pow10(uint32_t n) noexcept { +template auto small_division_by_pow10(uint32_t n) noexcept -> uint32_t { constexpr auto info = div_small_pow10_infos[N - 1]; FMT_ASSERT(n <= info.divisor * 10, "n is too large"); constexpr uint32_t magic_number = @@ -238,12 +239,12 @@ template uint32_t small_division_by_pow10(uint32_t n) noexcept { } // Computes floor(n / 10^(kappa + 1)) (float) -inline uint32_t divide_by_10_to_kappa_plus_1(uint32_t n) noexcept { +inline auto divide_by_10_to_kappa_plus_1(uint32_t n) noexcept -> uint32_t { // 1374389535 = ceil(2^37/100) return static_cast((static_cast(n) * 1374389535) >> 37); } // Computes floor(n / 10^(kappa + 1)) (double) -inline uint64_t divide_by_10_to_kappa_plus_1(uint64_t n) noexcept { +inline auto divide_by_10_to_kappa_plus_1(uint64_t n) noexcept -> uint64_t { // 2361183241434822607 = ceil(2^(64+7)/1000) return umul128_upper64(n, 2361183241434822607ull) >> 7; } @@ -255,7 +256,7 @@ template <> struct cache_accessor { using carrier_uint = float_info::carrier_uint; using cache_entry_type = uint64_t; - static uint64_t get_cached_power(int k) noexcept { + static auto get_cached_power(int k) noexcept -> uint64_t { FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, "k is out of range"); static constexpr const uint64_t pow10_significands[] = { @@ -297,20 +298,23 @@ template <> struct cache_accessor { bool is_integer; }; - static compute_mul_result compute_mul( - carrier_uint u, const cache_entry_type& cache) noexcept { + static auto compute_mul(carrier_uint u, + const cache_entry_type& cache) noexcept + -> compute_mul_result { auto r = umul96_upper64(u, cache); return {static_cast(r >> 32), static_cast(r) == 0}; } - static uint32_t compute_delta(const cache_entry_type& cache, - int beta) noexcept { + static auto compute_delta(const cache_entry_type& cache, int beta) noexcept + -> uint32_t { return static_cast(cache >> (64 - 1 - beta)); } - static compute_mul_parity_result compute_mul_parity( - carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept { + static auto compute_mul_parity(carrier_uint two_f, + const cache_entry_type& cache, + int beta) noexcept + -> compute_mul_parity_result { FMT_ASSERT(beta >= 1, ""); FMT_ASSERT(beta < 64, ""); @@ -319,22 +323,22 @@ template <> struct cache_accessor { static_cast(r >> (32 - beta)) == 0}; } - static carrier_uint compute_left_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_left_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return static_cast( (cache - (cache >> (num_significand_bits() + 2))) >> (64 - num_significand_bits() - 1 - beta)); } - static carrier_uint compute_right_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_right_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return static_cast( (cache + (cache >> (num_significand_bits() + 1))) >> (64 - num_significand_bits() - 1 - beta)); } - static carrier_uint compute_round_up_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_round_up_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return (static_cast( cache >> (64 - num_significand_bits() - 2 - beta)) + 1) / @@ -346,7 +350,7 @@ template <> struct cache_accessor { using carrier_uint = float_info::carrier_uint; using cache_entry_type = uint128_fallback; - static uint128_fallback get_cached_power(int k) noexcept { + static auto get_cached_power(int k) noexcept -> uint128_fallback { FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, "k is out of range"); @@ -985,8 +989,7 @@ template <> struct cache_accessor { {0xe0accfa875af45a7, 0x93eb1b80a33b8606}, {0x8c6c01c9498d8b88, 0xbc72f130660533c4}, {0xaf87023b9bf0ee6a, 0xeb8fad7c7f8680b5}, - { 0xdb68c2ca82ed2a05, - 0xa67398db9f6820e2 } + {0xdb68c2ca82ed2a05, 0xa67398db9f6820e2}, #else {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, @@ -1071,19 +1074,22 @@ template <> struct cache_accessor { bool is_integer; }; - static compute_mul_result compute_mul( - carrier_uint u, const cache_entry_type& cache) noexcept { + static auto compute_mul(carrier_uint u, + const cache_entry_type& cache) noexcept + -> compute_mul_result { auto r = umul192_upper128(u, cache); return {r.high(), r.low() == 0}; } - static uint32_t compute_delta(cache_entry_type const& cache, - int beta) noexcept { + static auto compute_delta(cache_entry_type const& cache, int beta) noexcept + -> uint32_t { return static_cast(cache.high() >> (64 - 1 - beta)); } - static compute_mul_parity_result compute_mul_parity( - carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept { + static auto compute_mul_parity(carrier_uint two_f, + const cache_entry_type& cache, + int beta) noexcept + -> compute_mul_parity_result { FMT_ASSERT(beta >= 1, ""); FMT_ASSERT(beta < 64, ""); @@ -1092,35 +1098,35 @@ template <> struct cache_accessor { ((r.high() << beta) | (r.low() >> (64 - beta))) == 0}; } - static carrier_uint compute_left_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_left_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return (cache.high() - (cache.high() >> (num_significand_bits() + 2))) >> (64 - num_significand_bits() - 1 - beta); } - static carrier_uint compute_right_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_right_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return (cache.high() + (cache.high() >> (num_significand_bits() + 1))) >> (64 - num_significand_bits() - 1 - beta); } - static carrier_uint compute_round_up_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_round_up_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return ((cache.high() >> (64 - num_significand_bits() - 2 - beta)) + 1) / 2; } }; -FMT_FUNC uint128_fallback get_cached_power(int k) noexcept { +FMT_FUNC auto get_cached_power(int k) noexcept -> uint128_fallback { return cache_accessor::get_cached_power(k); } // Various integer checks template -bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept { +auto is_left_endpoint_integer_shorter_interval(int exponent) noexcept -> bool { const int case_shorter_interval_left_endpoint_lower_threshold = 2; const int case_shorter_interval_left_endpoint_upper_threshold = 3; return exponent >= case_shorter_interval_left_endpoint_lower_threshold && @@ -1132,7 +1138,7 @@ FMT_INLINE int remove_trailing_zeros(uint32_t& n, int s = 0) noexcept { FMT_ASSERT(n != 0, ""); // Modular inverse of 5 (mod 2^32): (mod_inv_5 * 5) mod 2^32 = 1. constexpr uint32_t mod_inv_5 = 0xcccccccd; - constexpr uint32_t mod_inv_25 = 0xc28f5c29; // = mod_inv_5 * mod_inv_5 + constexpr uint32_t mod_inv_25 = 0xc28f5c29; // = mod_inv_5 * mod_inv_5 while (true) { auto q = rotr(n * mod_inv_25, 2); @@ -1168,7 +1174,7 @@ FMT_INLINE int remove_trailing_zeros(uint64_t& n) noexcept { // If n is not divisible by 10^8, work with n itself. constexpr uint64_t mod_inv_5 = 0xcccccccccccccccd; - constexpr uint64_t mod_inv_25 = 0x8f5c28f5c28f5c29; // = mod_inv_5 * mod_inv_5 + constexpr uint64_t mod_inv_25 = 0x8f5c28f5c28f5c29; // mod_inv_5 * mod_inv_5 int s = 0; while (true) { @@ -1234,7 +1240,7 @@ FMT_INLINE decimal_fp shorter_interval_case(int exponent) noexcept { return ret_value; } -template decimal_fp to_decimal(T x) noexcept { +template auto to_decimal(T x) noexcept -> decimal_fp { // Step 1: integer promotion & Schubfach multiplier calculation. using carrier_uint = typename float_info::carrier_uint; @@ -1373,15 +1379,15 @@ template <> struct formatter { for (auto i = n.bigits_.size(); i > 0; --i) { auto value = n.bigits_[i - 1u]; if (first) { - out = format_to(out, FMT_STRING("{:x}"), value); + out = fmt::format_to(out, FMT_STRING("{:x}"), value); first = false; continue; } - out = format_to(out, FMT_STRING("{:08x}"), value); + out = fmt::format_to(out, FMT_STRING("{:08x}"), value); } if (n.exp_ > 0) - out = format_to(out, FMT_STRING("p{}"), - n.exp_ * detail::bigint::bigit_bits); + out = fmt::format_to(out, FMT_STRING("p{}"), + n.exp_ * detail::bigint::bigit_bits); return out; } }; @@ -1417,7 +1423,7 @@ FMT_FUNC void report_system_error(int error_code, report_error(format_system_error, error_code, message); } -FMT_FUNC std::string vformat(string_view fmt, format_args args) { +FMT_FUNC auto vformat(string_view fmt, format_args args) -> std::string { // Don't optimize the "{}" case to keep the binary size small and because it // can be better optimized in fmt::format anyway. auto buffer = memory_buffer(); @@ -1426,33 +1432,38 @@ FMT_FUNC std::string vformat(string_view fmt, format_args args) { } namespace detail { -#ifndef _WIN32 -FMT_FUNC bool write_console(std::FILE*, string_view) { return false; } +#if !defined(_WIN32) || defined(FMT_WINDOWS_NO_WCHAR) +FMT_FUNC auto write_console(int, string_view) -> bool { return false; } #else using dword = conditional_t; extern "C" __declspec(dllimport) int __stdcall WriteConsoleW( // void*, const void*, dword, dword*, void*); -FMT_FUNC bool write_console(std::FILE* f, string_view text) { - auto fd = _fileno(f); - if (!_isatty(fd)) return false; +FMT_FUNC bool write_console(int fd, string_view text) { auto u16 = utf8_to_utf16(text); - auto written = dword(); return WriteConsoleW(reinterpret_cast(_get_osfhandle(fd)), u16.c_str(), - static_cast(u16.size()), &written, nullptr) != 0; + static_cast(u16.size()), nullptr, nullptr) != 0; } +#endif +#ifdef _WIN32 // Print assuming legacy (non-Unicode) encoding. FMT_FUNC void vprint_mojibake(std::FILE* f, string_view fmt, format_args args) { auto buffer = memory_buffer(); - detail::vformat_to(buffer, fmt, - basic_format_args>(args)); - fwrite_fully(buffer.data(), 1, buffer.size(), f); + detail::vformat_to(buffer, fmt, args); + fwrite_fully(buffer.data(), buffer.size(), f); } #endif FMT_FUNC void print(std::FILE* f, string_view text) { - if (!write_console(f, text)) fwrite_fully(text.data(), 1, text.size(), f); +#ifdef _WIN32 + int fd = _fileno(f); + if (_isatty(fd)) { + std::fflush(f); + if (write_console(fd, text)) return; + } +#endif + fwrite_fully(text.data(), text.size(), f); } } // namespace detail diff --git a/src/fmt/format.h b/src/fmt/format.h index 87a34b972c..8cdf95b7bd 100644 --- a/src/fmt/format.h +++ b/src/fmt/format.h @@ -37,17 +37,28 @@ #include // uint32_t #include // std::memcpy #include // std::initializer_list -#include // std::numeric_limits -#include // std::uninitialized_copy -#include // std::runtime_error -#include // std::system_error +#include +#include // std::numeric_limits +#include // std::uninitialized_copy +#include // std::runtime_error +#include // std::system_error #ifdef __cpp_lib_bit_cast -# include // std::bitcast +# include // std::bit_cast #endif #include "core.h" +// libc++ supports string_view in pre-c++17. +#if FMT_HAS_INCLUDE() && \ + (FMT_CPLUSPLUS >= 201703L || defined(_LIBCPP_VERSION)) +# include +# define FMT_USE_STRING_VIEW +#elif FMT_HAS_INCLUDE("experimental/string_view") && FMT_CPLUSPLUS >= 201402L +# include +# define FMT_USE_EXPERIMENTAL_STRING_VIEW +#endif + #if defined __cpp_inline_variables && __cpp_inline_variables >= 201606L # define FMT_INLINE_VARIABLE inline #else @@ -65,25 +76,11 @@ # define FMT_FALLTHROUGH #endif -#ifndef FMT_DEPRECATED -# if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VERSION >= 1900 -# define FMT_DEPRECATED [[deprecated]] -# else -# if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__) -# define FMT_DEPRECATED __attribute__((deprecated)) -# elif FMT_MSC_VERSION -# define FMT_DEPRECATED __declspec(deprecated) -# else -# define FMT_DEPRECATED /* deprecated */ -# endif -# endif -#endif - #ifndef FMT_NO_UNIQUE_ADDRESS # if FMT_CPLUSPLUS >= 202002L # if FMT_HAS_CPP_ATTRIBUTE(no_unique_address) # define FMT_NO_UNIQUE_ADDRESS [[no_unique_address]] -// VS2019 v16.10 and later except clang-cl (https://reviews.llvm.org/D110485) +// VS2019 v16.10 and later except clang-cl (https://reviews.llvm.org/D110485). # elif (FMT_MSC_VERSION >= 1929) && !FMT_CLANG_VERSION # define FMT_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]] # endif @@ -93,10 +90,11 @@ # define FMT_NO_UNIQUE_ADDRESS #endif -#if FMT_GCC_VERSION || defined(__clang__) -# define FMT_VISIBILITY(value) __attribute__((visibility(value))) +// Visibility when compiled as a shared library/object. +#if defined(FMT_LIB_EXPORT) || defined(FMT_SHARED) +# define FMT_SO_VISIBILITY(value) FMT_VISIBILITY(value) #else -# define FMT_VISIBILITY(value) +# define FMT_SO_VISIBILITY(value) #endif #ifdef __has_builtin @@ -152,7 +150,10 @@ FMT_END_NAMESPACE #ifndef FMT_USE_USER_DEFINED_LITERALS // EDG based compilers (Intel, NVIDIA, Elbrus, etc), GCC and MSVC support UDLs. -# if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 407 || \ +// +// GCC before 4.9 requires a space in `operator"" _a` which is invalid in later +// compiler versions. +# if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 409 || \ FMT_MSC_VERSION >= 1900) && \ (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= /* UDL feature */ 480) # define FMT_USE_USER_DEFINED_LITERALS 1 @@ -272,20 +273,19 @@ inline auto ctzll(uint64_t x) -> int { FMT_END_NAMESPACE #endif +namespace std { +template <> struct iterator_traits { + using value_type = void; + using iterator_category = std::output_iterator_tag; +}; +template +struct iterator_traits> { + using value_type = void; + using iterator_category = std::output_iterator_tag; +}; +} // namespace std + FMT_BEGIN_NAMESPACE - -template struct disjunction : std::false_type {}; -template struct disjunction

: P {}; -template -struct disjunction - : conditional_t> {}; - -template struct conjunction : std::true_type {}; -template struct conjunction

: P {}; -template -struct conjunction - : conditional_t, P1> {}; - namespace detail { FMT_CONSTEXPR inline void abort_fuzzing_if(bool condition) { @@ -295,6 +295,15 @@ FMT_CONSTEXPR inline void abort_fuzzing_if(bool condition) { #endif } +#if defined(FMT_USE_STRING_VIEW) +template using std_string_view = std::basic_string_view; +#elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW) +template +using std_string_view = std::experimental::basic_string_view; +#else +template struct std_string_view {}; +#endif + template struct string_literal { static constexpr CharT value[sizeof...(C)] = {C...}; constexpr operator basic_string_view() const { @@ -307,37 +316,6 @@ template constexpr CharT string_literal::value[sizeof...(C)]; #endif -template class formatbuf : public Streambuf { - private: - using char_type = typename Streambuf::char_type; - using streamsize = decltype(std::declval().sputn(nullptr, 0)); - using int_type = typename Streambuf::int_type; - using traits_type = typename Streambuf::traits_type; - - buffer& buffer_; - - public: - explicit formatbuf(buffer& buf) : buffer_(buf) {} - - protected: - // The put area is always empty. This makes the implementation simpler and has - // the advantage that the streambuf and the buffer are always in sync and - // sputc never writes into uninitialized memory. A disadvantage is that each - // call to sputc always results in a (virtual) call to overflow. There is no - // disadvantage here for sputn since this always results in a call to xsputn. - - auto overflow(int_type ch) -> int_type override { - if (!traits_type::eq_int_type(ch, traits_type::eof())) - buffer_.push_back(static_cast(ch)); - return ch; - } - - auto xsputn(const char_type* s, streamsize count) -> streamsize override { - buffer_.append(s, s + count); - return count; - } -}; - // Implementation of std::bit_cast for pre-C++20. template FMT_CONSTEXPR20 auto bit_cast(const From& from) -> To { @@ -373,8 +351,8 @@ class uint128_fallback { constexpr uint128_fallback(uint64_t hi, uint64_t lo) : lo_(lo), hi_(hi) {} constexpr uint128_fallback(uint64_t value = 0) : lo_(value), hi_(0) {} - constexpr uint64_t high() const noexcept { return hi_; } - constexpr uint64_t low() const noexcept { return lo_; } + constexpr auto high() const noexcept -> uint64_t { return hi_; } + constexpr auto low() const noexcept -> uint64_t { return lo_; } template ::value)> constexpr explicit operator T() const { @@ -450,7 +428,7 @@ class uint128_fallback { hi_ &= n.hi_; } - FMT_CONSTEXPR20 uint128_fallback& operator+=(uint64_t n) noexcept { + FMT_CONSTEXPR20 auto operator+=(uint64_t n) noexcept -> uint128_fallback& { if (is_constant_evaluated()) { lo_ += n; hi_ += (lo_ < n ? 1 : 0); @@ -546,6 +524,52 @@ FMT_INLINE void assume(bool condition) { #endif } +// Extracts a reference to the container from back_insert_iterator. +template +inline auto get_container(std::back_insert_iterator it) + -> Container& { + using base = std::back_insert_iterator; + struct accessor : base { + accessor(base b) : base(b) {} + using base::container; + }; + return *accessor(it).container; +} + +template +FMT_CONSTEXPR auto copy_str(InputIt begin, InputIt end, OutputIt out) + -> OutputIt { + while (begin != end) *out++ = static_cast(*begin++); + return out; +} + +template , U>::value&& is_char::value)> +FMT_CONSTEXPR auto copy_str(T* begin, T* end, U* out) -> U* { + if (is_constant_evaluated()) return copy_str(begin, end, out); + auto size = to_unsigned(end - begin); + if (size > 0) memcpy(out, begin, size * sizeof(U)); + return out + size; +} + +template +auto copy_str(InputIt begin, InputIt end, appender out) -> appender { + get_container(out).append(begin, end); + return out; +} +template +auto copy_str(InputIt begin, InputIt end, back_insert_iterator out) + -> back_insert_iterator { + get_container(out).append(begin, end); + return out; +} + +template +FMT_CONSTEXPR auto copy_str(R&& rng, OutputIt out) -> OutputIt { + return detail::copy_str(rng.begin(), rng.end(), out); +} + // An approximation of iterator_t for pre-C++20 systems. template using iterator_t = decltype(std::begin(std::declval())); @@ -740,7 +764,7 @@ inline auto compute_width(basic_string_view s) -> size_t { } // Computes approximate display width of a UTF-8 string. -FMT_CONSTEXPR inline size_t compute_width(string_view s) { +FMT_CONSTEXPR inline auto compute_width(string_view s) -> size_t { size_t num_code_points = 0; // It is not a lambda for compatibility with C++14. struct count_code_points { @@ -787,12 +811,17 @@ inline auto code_point_index(basic_string_view s, size_t n) -> size_t { // Calculates the index of the nth code point in a UTF-8 string. inline auto code_point_index(string_view s, size_t n) -> size_t { - const char* data = s.data(); - size_t num_code_points = 0; - for (size_t i = 0, size = s.size(); i != size; ++i) { - if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) return i; - } - return s.size(); + size_t result = s.size(); + const char* begin = s.begin(); + for_each_codepoint(s, [begin, &n, &result](uint32_t, string_view sv) { + if (n != 0) { + --n; + return true; + } + result = to_unsigned(sv.begin() - begin); + return false; + }); + return result; } inline auto code_point_index(basic_string_view s, size_t n) @@ -902,7 +931,7 @@ enum { inline_buffer_size = 500 }; **Example**:: auto out = fmt::memory_buffer(); - format_to(std::back_inserter(out), "The answer is {}.", 42); + fmt::format_to(std::back_inserter(out), "The answer is {}.", 42); This will append the following output to the ``out`` object: @@ -929,27 +958,29 @@ class basic_memory_buffer final : public detail::buffer { } protected: - FMT_CONSTEXPR20 void grow(size_t size) override { + static FMT_CONSTEXPR20 void grow(detail::buffer& buf, size_t size) { detail::abort_fuzzing_if(size > 5000); - const size_t max_size = std::allocator_traits::max_size(alloc_); - size_t old_capacity = this->capacity(); + auto& self = static_cast(buf); + const size_t max_size = + std::allocator_traits::max_size(self.alloc_); + size_t old_capacity = buf.capacity(); size_t new_capacity = old_capacity + old_capacity / 2; if (size > new_capacity) new_capacity = size; else if (new_capacity > max_size) new_capacity = size > max_size ? size : max_size; - T* old_data = this->data(); + T* old_data = buf.data(); T* new_data = - std::allocator_traits::allocate(alloc_, new_capacity); + std::allocator_traits::allocate(self.alloc_, new_capacity); // Suppress a bogus -Wstringop-overflow in gcc 13.1 (#3481). - detail::assume(this->size() <= new_capacity); + detail::assume(buf.size() <= new_capacity); // The following code doesn't throw, so the raw pointer above doesn't leak. - std::uninitialized_copy_n(old_data, this->size(), new_data); - this->set(new_data, new_capacity); + std::uninitialized_copy_n(old_data, buf.size(), new_data); + self.set(new_data, new_capacity); // deallocate must not throw according to the standard, but even if it does, // the buffer already uses the new storage and will deallocate it in // destructor. - if (old_data != store_) alloc_.deallocate(old_data, old_capacity); + if (old_data != self.store_) self.alloc_.deallocate(old_data, old_capacity); } public: @@ -958,7 +989,7 @@ class basic_memory_buffer final : public detail::buffer { FMT_CONSTEXPR20 explicit basic_memory_buffer( const Allocator& alloc = Allocator()) - : alloc_(alloc) { + : detail::buffer(grow), alloc_(alloc) { this->set(store_, SIZE); if (detail::is_constant_evaluated()) detail::fill_n(store_, SIZE, T()); } @@ -990,7 +1021,8 @@ class basic_memory_buffer final : public detail::buffer { of the other object to it. \endrst */ - FMT_CONSTEXPR20 basic_memory_buffer(basic_memory_buffer&& other) noexcept { + FMT_CONSTEXPR20 basic_memory_buffer(basic_memory_buffer&& other) noexcept + : detail::buffer(grow) { move(other); } @@ -1018,7 +1050,6 @@ class basic_memory_buffer final : public detail::buffer { /** Increases the buffer capacity to *new_capacity*. */ void reserve(size_t new_capacity) { this->try_reserve(new_capacity); } - // Directly append data into the buffer using detail::buffer::append; template void append(const ContiguousRange& range) { @@ -1034,7 +1065,7 @@ struct is_contiguous> : std::true_type { FMT_END_EXPORT namespace detail { -FMT_API bool write_console(std::FILE* f, string_view text); +FMT_API auto write_console(int fd, string_view text) -> bool; FMT_API void print(std::FILE*, string_view); } // namespace detail @@ -1046,7 +1077,7 @@ FMT_BEGIN_EXPORT #endif /** An error reported from a formatting function. */ -class FMT_VISIBILITY("default") format_error : public std::runtime_error { +class FMT_SO_VISIBILITY("default") format_error : public std::runtime_error { public: using std::runtime_error::runtime_error; }; @@ -1089,7 +1120,7 @@ class loc_value { loc_value(T) {} template auto visit(Visitor&& vis) -> decltype(vis(0)) { - return visit_format_arg(vis, value_); + return value_.visit(vis); } }; @@ -1153,13 +1184,13 @@ using uint32_or_64_or_128_t = template using uint64_or_128_t = conditional_t() <= 64, uint64_t, uint128_t>; -#define FMT_POWERS_OF_10(factor) \ - factor * 10, (factor)*100, (factor)*1000, (factor)*10000, (factor)*100000, \ - (factor)*1000000, (factor)*10000000, (factor)*100000000, \ - (factor)*1000000000 +#define FMT_POWERS_OF_10(factor) \ + factor * 10, (factor) * 100, (factor) * 1000, (factor) * 10000, \ + (factor) * 100000, (factor) * 1000000, (factor) * 10000000, \ + (factor) * 100000000, (factor) * 1000000000 // Converts value in the range [0, 100) to a string. -constexpr const char* digits2(size_t value) { +constexpr auto digits2(size_t value) -> const char* { // GCC generates slightly better code when value is pointer-size. return &"0001020304050607080910111213141516171819" "2021222324252627282930313233343536373839" @@ -1169,7 +1200,7 @@ constexpr const char* digits2(size_t value) { } // Sign is a template parameter to workaround a bug in gcc 4.8. -template constexpr Char sign(Sign s) { +template constexpr auto sign(Sign s) -> Char { #if !FMT_GCC_VERSION || FMT_GCC_VERSION >= 604 static_assert(std::is_same::value, ""); #endif @@ -1394,7 +1425,7 @@ FMT_CONSTEXPR inline auto format_uint(It out, UInt value, int num_digits, return out; } // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1). - char buffer[num_bits() / BASE_BITS + 1]; + char buffer[num_bits() / BASE_BITS + 1] = {}; format_uint(buffer, value, num_digits, upper); return detail::copy_str_noinline(buffer, buffer + num_digits, out); } @@ -1430,22 +1461,23 @@ template class to_utf8 { : "invalid utf32")); } operator string_view() const { return string_view(&buffer_[0], size()); } - size_t size() const { return buffer_.size() - 1; } - const char* c_str() const { return &buffer_[0]; } - std::string str() const { return std::string(&buffer_[0], size()); } + auto size() const -> size_t { return buffer_.size() - 1; } + auto c_str() const -> const char* { return &buffer_[0]; } + auto str() const -> std::string { return std::string(&buffer_[0], size()); } // Performs conversion returning a bool instead of throwing exception on // conversion error. This method may still throw in case of memory allocation // error. - bool convert(basic_string_view s, - to_utf8_error_policy policy = to_utf8_error_policy::abort) { + auto convert(basic_string_view s, + to_utf8_error_policy policy = to_utf8_error_policy::abort) + -> bool { if (!convert(buffer_, s, policy)) return false; buffer_.push_back(0); return true; } - static bool convert( - Buffer& buf, basic_string_view s, - to_utf8_error_policy policy = to_utf8_error_policy::abort) { + static auto convert(Buffer& buf, basic_string_view s, + to_utf8_error_policy policy = to_utf8_error_policy::abort) + -> bool { for (auto p = s.begin(); p != s.end(); ++p) { uint32_t c = static_cast(*p); if (sizeof(WChar) == 2 && c >= 0xd800 && c <= 0xdfff) { @@ -1481,7 +1513,7 @@ template class to_utf8 { }; // Computes 128-bit result of multiplication of two 64-bit unsigned integers. -inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept { +inline auto umul128(uint64_t x, uint64_t y) noexcept -> uint128_fallback { #if FMT_USE_INT128 auto p = static_cast(x) * static_cast(y); return {static_cast(p >> 64), static_cast(p)}; @@ -1512,19 +1544,19 @@ inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept { namespace dragonbox { // Computes floor(log10(pow(2, e))) for e in [-2620, 2620] using the method from // https://fmt.dev/papers/Dragonbox.pdf#page=28, section 6.1. -inline int floor_log10_pow2(int e) noexcept { +inline auto floor_log10_pow2(int e) noexcept -> int { FMT_ASSERT(e <= 2620 && e >= -2620, "too large exponent"); static_assert((-1 >> 1) == -1, "right shift is not arithmetic"); return (e * 315653) >> 20; } -inline int floor_log2_pow10(int e) noexcept { +inline auto floor_log2_pow10(int e) noexcept -> int { FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent"); return (e * 1741647) >> 19; } // Computes upper 64 bits of multiplication of two 64-bit unsigned integers. -inline uint64_t umul128_upper64(uint64_t x, uint64_t y) noexcept { +inline auto umul128_upper64(uint64_t x, uint64_t y) noexcept -> uint64_t { #if FMT_USE_INT128 auto p = static_cast(x) * static_cast(y); return static_cast(p >> 64); @@ -1537,14 +1569,14 @@ inline uint64_t umul128_upper64(uint64_t x, uint64_t y) noexcept { // Computes upper 128 bits of multiplication of a 64-bit unsigned integer and a // 128-bit unsigned integer. -inline uint128_fallback umul192_upper128(uint64_t x, - uint128_fallback y) noexcept { +inline auto umul192_upper128(uint64_t x, uint128_fallback y) noexcept + -> uint128_fallback { uint128_fallback r = umul128(x, y.high()); r += umul128_upper64(x, y.low()); return r; } -FMT_API uint128_fallback get_cached_power(int k) noexcept; +FMT_API auto get_cached_power(int k) noexcept -> uint128_fallback; // Type-specific information that Dragonbox uses. template struct float_info; @@ -1598,14 +1630,14 @@ template FMT_API auto to_decimal(T x) noexcept -> decimal_fp; } // namespace dragonbox // Returns true iff Float has the implicit bit which is not stored. -template constexpr bool has_implicit_bit() { +template constexpr auto has_implicit_bit() -> bool { // An 80-bit FP number has a 64-bit significand an no implicit bit. return std::numeric_limits::digits != 64; } // Returns the number of significand bits stored in Float. The implicit bit is // not counted since it is not stored. -template constexpr int num_significand_bits() { +template constexpr auto num_significand_bits() -> int { // std::numeric_limits may not support __float128. return is_float128() ? 112 : (std::numeric_limits::digits - @@ -1698,7 +1730,7 @@ using fp = basic_fp; // Normalizes the value converted from double and multiplied by (1 << SHIFT). template -FMT_CONSTEXPR basic_fp normalize(basic_fp value) { +FMT_CONSTEXPR auto normalize(basic_fp value) -> basic_fp { // Handle subnormals. const auto implicit_bit = F(1) << num_significand_bits(); const auto shifted_implicit_bit = implicit_bit << SHIFT; @@ -1715,7 +1747,7 @@ FMT_CONSTEXPR basic_fp normalize(basic_fp value) { } // Computes lhs * rhs / pow(2, 64) rounded to nearest with half-up tie breaking. -FMT_CONSTEXPR inline uint64_t multiply(uint64_t lhs, uint64_t rhs) { +FMT_CONSTEXPR inline auto multiply(uint64_t lhs, uint64_t rhs) -> uint64_t { #if FMT_USE_INT128 auto product = static_cast<__uint128_t>(lhs) * rhs; auto f = static_cast(product >> 64); @@ -1732,33 +1764,10 @@ FMT_CONSTEXPR inline uint64_t multiply(uint64_t lhs, uint64_t rhs) { #endif } -FMT_CONSTEXPR inline fp operator*(fp x, fp y) { +FMT_CONSTEXPR inline auto operator*(fp x, fp y) -> fp { return {multiply(x.f, y.f), x.e + y.e + 64}; } -template struct basic_data { - // For checking rounding thresholds. - // The kth entry is chosen to be the smallest integer such that the - // upper 32-bits of 10^(k+1) times it is strictly bigger than 5 * 10^k. - static constexpr uint32_t fractional_part_rounding_thresholds[8] = { - 2576980378U, // ceil(2^31 + 2^32/10^1) - 2190433321U, // ceil(2^31 + 2^32/10^2) - 2151778616U, // ceil(2^31 + 2^32/10^3) - 2147913145U, // ceil(2^31 + 2^32/10^4) - 2147526598U, // ceil(2^31 + 2^32/10^5) - 2147487943U, // ceil(2^31 + 2^32/10^6) - 2147484078U, // ceil(2^31 + 2^32/10^7) - 2147483691U // ceil(2^31 + 2^32/10^8) - }; -}; -// This is a struct rather than an alias to avoid shadowing warnings in gcc. -struct data : basic_data<> {}; - -#if FMT_CPLUSPLUS < 201703L -template -constexpr uint32_t basic_data::fractional_part_rounding_thresholds[]; -#endif - template () == num_bits()> using convert_float_result = conditional_t::value || doublish, double, T>; @@ -1939,15 +1948,11 @@ auto write_escaped_cp(OutputIt out, const find_escape_result& escape) *out++ = static_cast('\\'); break; default: - if (escape.cp < 0x100) { - return write_codepoint<2, Char>(out, 'x', escape.cp); - } - if (escape.cp < 0x10000) { + if (escape.cp < 0x100) return write_codepoint<2, Char>(out, 'x', escape.cp); + if (escape.cp < 0x10000) return write_codepoint<4, Char>(out, 'u', escape.cp); - } - if (escape.cp < 0x110000) { + if (escape.cp < 0x110000) return write_codepoint<8, Char>(out, 'U', escape.cp); - } for (Char escape_char : basic_string_view( escape.begin, to_unsigned(escape.end - escape.begin))) { out = write_codepoint<2, Char>(out, 'x', @@ -1977,11 +1982,13 @@ auto write_escaped_string(OutputIt out, basic_string_view str) template auto write_escaped_char(OutputIt out, Char v) -> OutputIt { + Char v_array[1] = {v}; *out++ = static_cast('\''); if ((needs_escape(static_cast(v)) && v != static_cast('"')) || v == static_cast('\'')) { - out = write_escaped_cp( - out, find_escape_result{&v, &v + 1, static_cast(v)}); + out = write_escaped_cp(out, + find_escape_result{v_array, v_array + 1, + static_cast(v)}); } else { *out++ = v; } @@ -2070,10 +2077,10 @@ template class digit_grouping { std::string::const_iterator group; int pos; }; - next_state initial_state() const { return {grouping_.begin(), 0}; } + auto initial_state() const -> next_state { return {grouping_.begin(), 0}; } // Returns the next digit group separator position. - int next(next_state& state) const { + auto next(next_state& state) const -> int { if (thousands_sep_.empty()) return max_value(); if (state.group == grouping_.end()) return state.pos += grouping_.back(); if (*state.group <= 0 || *state.group == max_value()) @@ -2092,9 +2099,9 @@ template class digit_grouping { digit_grouping(std::string grouping, std::basic_string sep) : grouping_(std::move(grouping)), thousands_sep_(std::move(sep)) {} - bool has_separator() const { return !thousands_sep_.empty(); } + auto has_separator() const -> bool { return !thousands_sep_.empty(); } - int count_separators(int num_digits) const { + auto count_separators(int num_digits) const -> int { int count = 0; auto state = initial_state(); while (num_digits > next(state)) ++count; @@ -2103,7 +2110,7 @@ template class digit_grouping { // Applies grouping to digits and write the output to out. template - Out apply(Out out, basic_string_view digits) const { + auto apply(Out out, basic_string_view digits) const -> Out { auto num_digits = static_cast(digits.size()); auto separators = basic_memory_buffer(); separators.push_back(0); @@ -2126,24 +2133,66 @@ template class digit_grouping { } }; +FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) { + prefix |= prefix != 0 ? value << 8 : value; + prefix += (1u + (value > 0xff ? 1 : 0)) << 24; +} + // Writes a decimal integer with digit grouping. template auto write_int(OutputIt out, UInt value, unsigned prefix, const format_specs& specs, const digit_grouping& grouping) -> OutputIt { static_assert(std::is_same, UInt>::value, ""); - int num_digits = count_digits(value); - char digits[40]; - format_decimal(digits, value, num_digits); - unsigned size = to_unsigned((prefix != 0 ? 1 : 0) + num_digits + - grouping.count_separators(num_digits)); + int num_digits = 0; + auto buffer = memory_buffer(); + switch (specs.type) { + case presentation_type::none: + case presentation_type::dec: { + num_digits = count_digits(value); + format_decimal(appender(buffer), value, num_digits); + break; + } + case presentation_type::hex_lower: + case presentation_type::hex_upper: { + bool upper = specs.type == presentation_type::hex_upper; + if (specs.alt) + prefix_append(prefix, unsigned(upper ? 'X' : 'x') << 8 | '0'); + num_digits = count_digits<4>(value); + format_uint<4, char>(appender(buffer), value, num_digits, upper); + break; + } + case presentation_type::bin_lower: + case presentation_type::bin_upper: { + bool upper = specs.type == presentation_type::bin_upper; + if (specs.alt) + prefix_append(prefix, unsigned(upper ? 'B' : 'b') << 8 | '0'); + num_digits = count_digits<1>(value); + format_uint<1, char>(appender(buffer), value, num_digits); + break; + } + case presentation_type::oct: { + num_digits = count_digits<3>(value); + // Octal prefix '0' is counted as a digit, so only add it if precision + // is not greater than the number of digits. + if (specs.alt && specs.precision <= num_digits && value != 0) + prefix_append(prefix, '0'); + format_uint<3, char>(appender(buffer), value, num_digits); + break; + } + case presentation_type::chr: + return write_char(out, static_cast(value), specs); + default: + throw_format_error("invalid format specifier"); + } + + unsigned size = (prefix != 0 ? prefix >> 24 : 0) + to_unsigned(num_digits) + + to_unsigned(grouping.count_separators(num_digits)); return write_padded( out, specs, size, size, [&](reserve_iterator it) { - if (prefix != 0) { - char sign = static_cast(prefix); - *it++ = static_cast(sign); - } - return grouping.apply(it, string_view(digits, to_unsigned(num_digits))); + for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8) + *it++ = static_cast(p & 0xff); + return grouping.apply(it, string_view(buffer.data(), buffer.size())); }); } @@ -2156,11 +2205,6 @@ inline auto write_loc(OutputIt, loc_value, const format_specs&, return false; } -FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) { - prefix |= prefix != 0 ? value << 8 : value; - prefix += (1u + (value > 0xff ? 1 : 0)) << 24; -} - template struct write_int_arg { UInt abs_value; unsigned prefix; @@ -2307,25 +2351,25 @@ class counting_iterator { FMT_CONSTEXPR counting_iterator() : count_(0) {} - FMT_CONSTEXPR size_t count() const { return count_; } + FMT_CONSTEXPR auto count() const -> size_t { return count_; } - FMT_CONSTEXPR counting_iterator& operator++() { + FMT_CONSTEXPR auto operator++() -> counting_iterator& { ++count_; return *this; } - FMT_CONSTEXPR counting_iterator operator++(int) { + FMT_CONSTEXPR auto operator++(int) -> counting_iterator { auto it = *this; ++*this; return it; } - FMT_CONSTEXPR friend counting_iterator operator+(counting_iterator it, - difference_type n) { + FMT_CONSTEXPR friend auto operator+(counting_iterator it, difference_type n) + -> counting_iterator { it.count_ += static_cast(n); return it; } - FMT_CONSTEXPR value_type operator*() const { return {}; } + FMT_CONSTEXPR auto operator*() const -> value_type { return {}; } }; template @@ -2360,9 +2404,10 @@ template FMT_CONSTEXPR auto write(OutputIt out, const Char* s, const format_specs& specs, locale_ref) -> OutputIt { - return specs.type != presentation_type::pointer - ? write(out, basic_string_view(s), specs, {}) - : write_ptr(out, bit_cast(s), &specs); + if (specs.type == presentation_type::pointer) + return write_ptr(out, bit_cast(s), &specs); + if (!s) throw_format_error("string pointer is null"); + return write(out, basic_string_view(s), specs, {}); } template -FMT_CONSTEXPR auto parse_float_type_spec(const format_specs& specs, - ErrorHandler&& eh = {}) +template +FMT_CONSTEXPR auto parse_float_type_spec(const format_specs& specs) -> float_specs { auto result = float_specs(); result.showpoint = specs.alt; @@ -2486,7 +2530,7 @@ FMT_CONSTEXPR auto parse_float_type_spec(const format_specs& specs, result.format = float_format::hex; break; default: - eh.on_error("invalid format specifier"); + throw_format_error("invalid format specifier"); break; } return result; @@ -2725,12 +2769,12 @@ template class fallback_digit_grouping { public: constexpr fallback_digit_grouping(locale_ref, bool) {} - constexpr bool has_separator() const { return false; } + constexpr auto has_separator() const -> bool { return false; } - constexpr int count_separators(int) const { return 0; } + constexpr auto count_separators(int) const -> int { return 0; } template - constexpr Out apply(Out out, basic_string_view) const { + constexpr auto apply(Out out, basic_string_view) const -> Out { return out; } }; @@ -2749,7 +2793,7 @@ FMT_CONSTEXPR20 auto write_float(OutputIt out, const DecimalFP& f, } } -template constexpr bool isnan(T value) { +template constexpr auto isnan(T value) -> bool { return !(value >= value); // std::isnan doesn't support __float128. } @@ -2762,14 +2806,14 @@ struct has_isfinite> template ::value&& has_isfinite::value)> -FMT_CONSTEXPR20 bool isfinite(T value) { +FMT_CONSTEXPR20 auto isfinite(T value) -> bool { constexpr T inf = T(std::numeric_limits::infinity()); if (is_constant_evaluated()) return !detail::isnan(value) && value < inf && value > -inf; return std::isfinite(value); } template ::value)> -FMT_CONSTEXPR bool isfinite(T value) { +FMT_CONSTEXPR auto isfinite(T value) -> bool { T inf = T(std::numeric_limits::infinity()); // std::isfinite doesn't support __float128. return !detail::isnan(value) && value < inf && value > -inf; @@ -2806,10 +2850,10 @@ class bigint { basic_memory_buffer bigits_; int exp_; - FMT_CONSTEXPR20 bigit operator[](int index) const { + FMT_CONSTEXPR20 auto operator[](int index) const -> bigit { return bigits_[to_unsigned(index)]; } - FMT_CONSTEXPR20 bigit& operator[](int index) { + FMT_CONSTEXPR20 auto operator[](int index) -> bigit& { return bigits_[to_unsigned(index)]; } @@ -2905,11 +2949,11 @@ class bigint { assign(uint64_or_128_t(n)); } - FMT_CONSTEXPR20 int num_bigits() const { + FMT_CONSTEXPR20 auto num_bigits() const -> int { return static_cast(bigits_.size()) + exp_; } - FMT_NOINLINE FMT_CONSTEXPR20 bigint& operator<<=(int shift) { + FMT_NOINLINE FMT_CONSTEXPR20 auto operator<<=(int shift) -> bigint& { FMT_ASSERT(shift >= 0, ""); exp_ += shift / bigit_bits; shift %= bigit_bits; @@ -2924,13 +2968,15 @@ class bigint { return *this; } - template FMT_CONSTEXPR20 bigint& operator*=(Int value) { + template + FMT_CONSTEXPR20 auto operator*=(Int value) -> bigint& { FMT_ASSERT(value > 0, ""); multiply(uint32_or_64_or_128_t(value)); return *this; } - friend FMT_CONSTEXPR20 int compare(const bigint& lhs, const bigint& rhs) { + friend FMT_CONSTEXPR20 auto compare(const bigint& lhs, const bigint& rhs) + -> int { int num_lhs_bigits = lhs.num_bigits(), num_rhs_bigits = rhs.num_bigits(); if (num_lhs_bigits != num_rhs_bigits) return num_lhs_bigits > num_rhs_bigits ? 1 : -1; @@ -2947,8 +2993,9 @@ class bigint { } // Returns compare(lhs1 + lhs2, rhs). - friend FMT_CONSTEXPR20 int add_compare(const bigint& lhs1, const bigint& lhs2, - const bigint& rhs) { + friend FMT_CONSTEXPR20 auto add_compare(const bigint& lhs1, + const bigint& lhs2, const bigint& rhs) + -> int { auto minimum = [](int a, int b) { return a < b ? a : b; }; auto maximum = [](int a, int b) { return a > b ? a : b; }; int max_lhs_bigits = maximum(lhs1.num_bigits(), lhs2.num_bigits()); @@ -3029,13 +3076,13 @@ class bigint { bigits_.resize(to_unsigned(num_bigits + exp_difference)); for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j) bigits_[j] = bigits_[i]; - std::uninitialized_fill_n(bigits_.data(), exp_difference, 0); + std::uninitialized_fill_n(bigits_.data(), exp_difference, 0u); exp_ -= exp_difference; } // Divides this bignum by divisor, assigning the remainder to this and // returning the quotient. - FMT_CONSTEXPR20 int divmod_assign(const bigint& divisor) { + FMT_CONSTEXPR20 auto divmod_assign(const bigint& divisor) -> int { FMT_ASSERT(this != &divisor, ""); if (compare(*this, divisor) < 0) return 0; FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, ""); @@ -3178,8 +3225,10 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp value, } if (buf[0] == overflow) { buf[0] = '1'; - if ((flags & dragon::fixed) != 0) buf.push_back('0'); - else ++exp10; + if ((flags & dragon::fixed) != 0) + buf.push_back('0'); + else + ++exp10; } return; } @@ -3276,6 +3325,17 @@ FMT_CONSTEXPR20 void format_hexfloat(Float value, int precision, format_hexfloat(static_cast(value), precision, specs, buf); } +constexpr auto fractional_part_rounding_thresholds(int index) -> uint32_t { + // For checking rounding thresholds. + // The kth entry is chosen to be the smallest integer such that the + // upper 32-bits of 10^(k+1) times it is strictly bigger than 5 * 10^k. + // It is equal to ceil(2^31 + 2^32/10^(k + 1)). + // These are stored in a string literal because we cannot have static arrays + // in constexpr functions and non-static ones are poorly optimized. + return U"\x9999999a\x828f5c29\x80418938\x80068db9\x8000a7c6\x800010c7" + U"\x800001ae\x8000002b"[index]; +} + template FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, buffer& buf) -> int { @@ -3480,12 +3540,12 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, // fractional part is strictly larger than 1/2. if (precision < 9) { uint32_t fractional_part = static_cast(prod); - should_round_up = fractional_part >= - data::fractional_part_rounding_thresholds - [8 - number_of_digits_to_print] || - ((fractional_part >> 31) & - ((digits & 1) | (second_third_subsegments != 0) | - has_more_segments)) != 0; + should_round_up = + fractional_part >= fractional_part_rounding_thresholds( + 8 - number_of_digits_to_print) || + ((fractional_part >> 31) & + ((digits & 1) | (second_third_subsegments != 0) | + has_more_segments)) != 0; } // Rounding at the subsegment boundary. // In this case, the fractional part is at least 1/2 if and only if @@ -3520,12 +3580,12 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, // of 19 digits, so in this case the third segment should be // consisting of a genuine digit from the input. uint32_t fractional_part = static_cast(prod); - should_round_up = fractional_part >= - data::fractional_part_rounding_thresholds - [8 - number_of_digits_to_print] || - ((fractional_part >> 31) & - ((digits & 1) | (third_subsegment != 0) | - has_more_segments)) != 0; + should_round_up = + fractional_part >= fractional_part_rounding_thresholds( + 8 - number_of_digits_to_print) || + ((fractional_part >> 31) & + ((digits & 1) | (third_subsegment != 0) | + has_more_segments)) != 0; } // Rounding at the subsegment boundary. else { @@ -3726,8 +3786,7 @@ FMT_CONSTEXPR auto write(OutputIt out, Char value) -> OutputIt { } template -FMT_CONSTEXPR_CHAR_TRAITS auto write(OutputIt out, const Char* value) - -> OutputIt { +FMT_CONSTEXPR20 auto write(OutputIt out, const Char* value) -> OutputIt { if (value) return write(out, basic_string_view(value)); throw_format_error("string pointer is null"); return out; @@ -3757,8 +3816,11 @@ template enable_if_t::value == type::custom_type, OutputIt> { + auto formatter = typename Context::template formatter_type(); + auto parse_ctx = typename Context::parse_context_type({}); + formatter.parse(parse_ctx); auto ctx = Context(out, {}, {}); - return typename Context::template formatter_type().format(value, ctx); + return formatter.format(value, ctx); } // An argument visitor that formats the argument and writes it via the output @@ -3801,62 +3863,39 @@ template struct arg_formatter { } }; -template struct custom_formatter { - basic_format_parse_context& parse_ctx; - buffer_context& ctx; - - void operator()( - typename basic_format_arg>::handle h) const { - h.format(parse_ctx, ctx); - } - template void operator()(T) const {} -}; - -template class width_checker { - public: - explicit FMT_CONSTEXPR width_checker(ErrorHandler& eh) : handler_(eh) {} - +struct width_checker { template ::value)> FMT_CONSTEXPR auto operator()(T value) -> unsigned long long { - if (is_negative(value)) handler_.on_error("negative width"); + if (is_negative(value)) throw_format_error("negative width"); return static_cast(value); } template ::value)> FMT_CONSTEXPR auto operator()(T) -> unsigned long long { - handler_.on_error("width is not integer"); + throw_format_error("width is not integer"); return 0; } - - private: - ErrorHandler& handler_; }; -template class precision_checker { - public: - explicit FMT_CONSTEXPR precision_checker(ErrorHandler& eh) : handler_(eh) {} - +struct precision_checker { template ::value)> FMT_CONSTEXPR auto operator()(T value) -> unsigned long long { - if (is_negative(value)) handler_.on_error("negative precision"); + if (is_negative(value)) throw_format_error("negative precision"); return static_cast(value); } template ::value)> FMT_CONSTEXPR auto operator()(T) -> unsigned long long { - handler_.on_error("precision is not integer"); + throw_format_error("precision is not integer"); return 0; } - - private: - ErrorHandler& handler_; }; -template