diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index 11a6098ed7..1c14850a07 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -251,6 +251,7 @@ set(STANDARD_PACKAGES
   KSPACE
   LATBOLTZ
   LATTE
+  LEPTON
   MACHDYN
   MANIFOLD
   MANYBODY
@@ -513,7 +514,7 @@ else()
 endif()
 
 foreach(PKG_WITH_INCL KSPACE PYTHON ML-IAP VORONOI COLVARS ML-HDNNP MDI MOLFILE NETCDF
-        PLUMED QMMM ML-QUIP SCAFACOS MACHDYN VTK KIM LATTE MSCG COMPRESS ML-PACE)
+        PLUMED QMMM ML-QUIP SCAFACOS MACHDYN VTK KIM LATTE MSCG COMPRESS ML-PACE LEPTON)
   if(PKG_${PKG_WITH_INCL})
     include(Packages/${PKG_WITH_INCL})
   endif()
diff --git a/cmake/Modules/Packages/COLVARS.cmake b/cmake/Modules/Packages/COLVARS.cmake
index da5b685536..3ca48b81fc 100644
--- a/cmake/Modules/Packages/COLVARS.cmake
+++ b/cmake/Modules/Packages/COLVARS.cmake
@@ -2,19 +2,14 @@ set(COLVARS_SOURCE_DIR ${LAMMPS_LIB_SOURCE_DIR}/colvars)
 
 file(GLOB COLVARS_SOURCES ${COLVARS_SOURCE_DIR}/[^.]*.cpp)
 
-option(COLVARS_DEBUG "Debugging messages for Colvars (quite verbose)" OFF)
+option(COLVARS_DEBUG "Enable debugging messages for Colvars (quite verbose)" OFF)
 
-# Build Lepton by default
-option(COLVARS_LEPTON "Build and link the Lepton library" ON)
+option(COLVARS_LEPTON "Use the Lepton library for custom expressions" ON)
 
 if(COLVARS_LEPTON)
-  set(LEPTON_DIR ${LAMMPS_LIB_SOURCE_DIR}/colvars/lepton)
-  file(GLOB LEPTON_SOURCES ${LEPTON_DIR}/src/[^.]*.cpp)
-  add_library(lepton STATIC ${LEPTON_SOURCES})
-  # Change the define below to LEPTON_BUILDING_SHARED_LIBRARY when linking Lepton as a DLL with MSVC
-  target_compile_definitions(lepton PRIVATE -DLEPTON_BUILDING_STATIC_LIBRARY)
-  set_target_properties(lepton PROPERTIES OUTPUT_NAME lammps_lepton${LAMMPS_MACHINE})
-  target_include_directories(lepton PRIVATE ${LEPTON_DIR}/include)
+  if(NOT LEPTON_SOURCE_DIR)
+    include(Packages/LEPTON)
+  endif()
 endif()
 
 add_library(colvars STATIC ${COLVARS_SOURCES})
@@ -30,14 +25,11 @@ target_include_directories(colvars PRIVATE ${LAMMPS_SOURCE_DIR})
 target_link_libraries(lammps PRIVATE colvars)
 
 if(COLVARS_DEBUG)
-  # Need to export the macro publicly to also affect the proxy
+  # Need to export the macro publicly to be valid in interface code
   target_compile_definitions(colvars PUBLIC -DCOLVARS_DEBUG)
 endif()
 
 if(COLVARS_LEPTON)
-  target_link_libraries(lammps PRIVATE lepton)
   target_compile_definitions(colvars PRIVATE -DLEPTON)
-  # Disable the line below when linking Lepton as a DLL with MSVC
-  target_compile_definitions(colvars PRIVATE -DLEPTON_USE_STATIC_LIBRARIES)
-  target_include_directories(colvars PUBLIC ${LEPTON_DIR}/include)
+  target_link_libraries(colvars PRIVATE lepton)
 endif()
diff --git a/cmake/Modules/Packages/LEPTON.cmake b/cmake/Modules/Packages/LEPTON.cmake
new file mode 100644
index 0000000000..df8a201c6b
--- /dev/null
+++ b/cmake/Modules/Packages/LEPTON.cmake
@@ -0,0 +1,35 @@
+# avoid including this file twice
+if(LEPTON_SOURCE_DIR)
+   return()
+endif()
+set(LEPTON_SOURCE_DIR ${LAMMPS_LIB_SOURCE_DIR}/lepton)
+
+file(GLOB LEPTON_SOURCES ${LEPTON_SOURCE_DIR}/src/[^.]*.cpp)
+
+if((CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "amd64") OR
+   (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "AMD64") OR
+   (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64"))
+   option(LEPTON_ENABLE_JIT "Enable Just-In-Time compiler for Lepton" ON)
+else()
+   option(LEPTON_ENABLE_JIT "Enable Just-In-Time compiler for Lepton" OFF)
+endif()
+
+if(LEPTON_ENABLE_JIT)
+  file(GLOB ASMJIT_SOURCES ${LEPTON_SOURCE_DIR}/asmjit/*/[^.]*.cpp)
+endif()
+
+add_library(lepton STATIC ${LEPTON_SOURCES} ${ASMJIT_SOURCES})
+set_target_properties(lepton PROPERTIES OUTPUT_NAME lammps_lepton${LAMMPS_MACHINE})
+target_compile_definitions(lepton PUBLIC LEPTON_BUILDING_STATIC_LIBRARY=1)
+target_include_directories(lepton PUBLIC ${LEPTON_SOURCE_DIR}/include)
+if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+  find_library(LIB_RT rt QUIET)
+  target_link_libraries(lepton PUBLIC ${LIB_RT})
+endif()
+
+if(LEPTON_ENABLE_JIT)
+  target_compile_definitions(lepton PUBLIC "LEPTON_USE_JIT=1;ASMJIT_BUILD_X86=1;ASMJIT_STATIC=1;ASMJIT_BUILD_RELEASE=1")
+  target_include_directories(lepton PUBLIC ${LEPTON_SOURCE_DIR})
+endif()
+
+target_link_libraries(lammps PRIVATE lepton)
diff --git a/cmake/presets/all_off.cmake b/cmake/presets/all_off.cmake
index 9127305528..3d5ee95b3d 100644
--- a/cmake/presets/all_off.cmake
+++ b/cmake/presets/all_off.cmake
@@ -44,6 +44,7 @@ set(ALL_PACKAGES
   KSPACE
   LATBOLTZ
   LATTE
+  LEPTON
   MACHDYN
   MANIFOLD
   MANYBODY
diff --git a/cmake/presets/all_on.cmake b/cmake/presets/all_on.cmake
index 0a001bdc56..474051f6ec 100644
--- a/cmake/presets/all_on.cmake
+++ b/cmake/presets/all_on.cmake
@@ -46,6 +46,7 @@ set(ALL_PACKAGES
   KSPACE
   LATBOLTZ
   LATTE
+  LEPTON
   MACHDYN
   MANIFOLD
   MANYBODY
diff --git a/cmake/presets/mingw-cross.cmake b/cmake/presets/mingw-cross.cmake
index 2d74657394..6c6170acd3 100644
--- a/cmake/presets/mingw-cross.cmake
+++ b/cmake/presets/mingw-cross.cmake
@@ -36,6 +36,7 @@ set(WIN_PACKAGES
   INTERLAYER
   KSPACE
   LATTE
+  LEPTON
   MACHDYN
   MANIFOLD
   MANYBODY
diff --git a/cmake/presets/most.cmake b/cmake/presets/most.cmake
index 5dd9a2b78b..0d63140506 100644
--- a/cmake/presets/most.cmake
+++ b/cmake/presets/most.cmake
@@ -35,6 +35,7 @@ set(ALL_PACKAGES
   GRANULAR
   INTERLAYER
   KSPACE
+  LEPTON
   MACHDYN
   MANYBODY
   MC
diff --git a/cmake/presets/nolib.cmake b/cmake/presets/nolib.cmake
index b6567ad617..b022d4bb55 100644
--- a/cmake/presets/nolib.cmake
+++ b/cmake/presets/nolib.cmake
@@ -13,6 +13,7 @@ set(PACKAGES_WITH_LIB
   KOKKOS
   LATBOLTZ
   LATTE
+  LEPTON
   MACHDYN
   MDI
   MESONT
diff --git a/cmake/presets/windows.cmake b/cmake/presets/windows.cmake
index e93cd35daa..7075659964 100644
--- a/cmake/presets/windows.cmake
+++ b/cmake/presets/windows.cmake
@@ -31,6 +31,7 @@ set(WIN_PACKAGES
   GRANULAR
   INTERLAYER
   KSPACE
+  LEPTON
   MANIFOLD
   MANYBODY
   MC
diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst
index d1161164e9..eab9355a83 100644
--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
@@ -35,7 +35,7 @@ This is the list of packages that may require additional steps.
    * :ref:`ADIOS <adios>`
    * :ref:`ATC <atc>`
    * :ref:`AWPMD <awpmd>`
-   * :ref:`COLVARS <colvars>`
+   * :ref:`COLVARS <colvar>`
    * :ref:`COMPRESS <compress>`
    * :ref:`ELECTRODE <electrode>`
    * :ref:`GPU <gpu>`
@@ -44,6 +44,7 @@ This is the list of packages that may require additional steps.
    * :ref:`KIM <kim>`
    * :ref:`KOKKOS <kokkos>`
    * :ref:`LATTE <latte>`
+   * :ref:`LEPTON <lepton>`
    * :ref:`MACHDYN <machdyn>`
    * :ref:`MDI <mdi>`
    * :ref:`MESONT <mesont>`
@@ -876,6 +877,55 @@ library.
 
 ----------
 
+.. _lepton:
+
+LEPTON package
+--------------
+
+To build with this package, you must build the Lepton library which is
+included in the LAMMPS source distribution in the ``lib/lepton`` folder.
+
+.. tabs::
+
+   .. tab:: CMake build
+
+      This is the recommended build procedure for using Lepton in
+      LAMMPS. No additional settings are normally needed besides
+      ``-D PKG_LEPTON=yes``.
+
+      On x86 hardware the Lepton library will also include a just-in-time
+      compiler for faster execution.  This is auto detected but can
+      be explicitly disabled by setting ``-D LEPTON_ENABLE_JIT=no``
+      (or enabled by setting it to yes).
+
+   .. tab:: Traditional make
+
+      Before building LAMMPS, one must build the Lepton library in lib/lepton.
+
+      This can be done manually in the same folder by using or adapting
+      one of the provided Makefiles: for example, ``Makefile.serial`` for
+      the GNU C++ compiler, or ``Makefile.mpi`` for the MPI compiler wrapper.
+      The Lepton library is written in C++-11 and thus the C++ compiler
+      may need to be instructed to enable support for that.
+
+      In general, it is safer to use build setting consistent with the
+      rest of LAMMPS.  This is best carried out from the LAMMPS src
+      directory using a command like these, which simply invokes the
+      ``lib/lepton/Install.py`` script with the specified args:
+
+      .. code-block:: bash
+
+         $ make lib-lepton                      # print help message
+         $ make lib-lepton args="-m serial"     # build with GNU g++ compiler (settings as with "make serial")
+         $ make lib-lepton args="-m mpi"        # build with default MPI compiler (settings as with "make mpi")
+
+      The "machine" argument of the "-m" flag is used to find a
+      Makefile.machine to use as build recipe.
+
+      The build should produce a ``build`` folder and the library ``lib/lepton/liblmplepton.a``
+
+----------
+
 .. _mliap:
 
 ML-IAP package
@@ -1221,7 +1271,7 @@ The ATC package requires the MANYBODY package also be installed.
 .. _awpmd:
 
 AWPMD package
-------------------
+-------------
 
 .. tabs::
 
@@ -1269,14 +1319,13 @@ AWPMD package
 
 ----------
 
-.. _colvars:
+.. _colvar:
 
 COLVARS package
----------------------------------------
+---------------
 
-This package includes the `Colvars library
-<https://colvars.github.io/>`_ into the LAMMPS distribution, which can
-be built for the most part with all major versions of the C++ language.
+This package enables the use of the `Colvars <https://colvars.github.io/>`_
+module included in the LAMMPS source distribution.
 
 
 .. tabs::
@@ -1289,17 +1338,13 @@ be built for the most part with all major versions of the C++ language.
 
    .. tab:: Traditional make
 
-      Before building LAMMPS, one must build the Colvars library in lib/colvars.
+      As with other libraries distributed with LAMMPS, the Colvars library
+      needs to be built before building the LAMMPS program with the COLVARS
+      package enabled.
 
-      This can be done manually in the same folder by using or adapting
-      one of the provided Makefiles: for example, ``Makefile.g++`` for
-      the GNU C++ compiler.  C++11 compatibility may need to be enabled
-      for some older compilers (as is done in the example makefile).
-
-      In general, it is safer to use build setting consistent with the
-      rest of LAMMPS.  This is best carried out from the LAMMPS src
-      directory using a command like these, which simply invokes the
-      ``lib/colvars/Install.py`` script with the specified args:
+      From the LAMMPS ``src`` directory, this is most easily and safely done
+      via one of the following commands, which implicitly rely on the
+      ``lib/colvars/Install.py`` script with optional arguments:
 
       .. code-block:: bash
 
@@ -1309,10 +1354,17 @@ be built for the most part with all major versions of the C++ language.
          make lib-colvars args="-m g++-debug"  # build with GNU g++ compiler and colvars debugging enabled
 
       The "machine" argument of the "-m" flag is used to find a
-      Makefile.machine to use as build recipe.  If it does not already
-      exist in ``lib/colvars``, it will be auto-generated by using
-      compiler flags consistent with those parsed from the core LAMMPS
-      makefiles.
+      ``Makefile.machine`` file to use as build recipe.  If such recipe does
+      not already exist in ``lib/colvars``, suitable settings will be
+      auto-generated consistent with those used in the core LAMMPS makefiles.
+
+
+      .. versionchanged:: TBD
+
+      Please note that Colvars uses the Lepton library, which is now
+      included with the LEPTON package; if you use anything other than
+      the ``make lib-colvars`` command, please make sure to :ref:`build
+      Lepton beforehand <lepton>`.
 
       Optional flags may be specified as environment variables:
 
@@ -1321,10 +1373,10 @@ be built for the most part with all major versions of the C++ language.
          COLVARS_DEBUG=yes make lib-colvars args="-m machine"  # Build with debug code (much slower)
          COLVARS_LEPTON=no make lib-colvars args="-m machine"  # Build without Lepton (included otherwise)
 
-      The build should produce two files: the library ``lib/colvars/libcolvars.a``
-      (which also includes Lepton objects if enabled) and the specification file
-      ``lib/colvars/Makefile.lammps``.  The latter is auto-generated, and normally does
-      not need to be edited.
+      The build should produce two files: the library
+      ``lib/colvars/libcolvars.a`` and the specification file
+      ``lib/colvars/Makefile.lammps``.  The latter is auto-generated,
+      and normally does not need to be edited.
 
 ----------
 
diff --git a/doc/src/Build_package.rst b/doc/src/Build_package.rst
index 4ee779742e..938ffec306 100644
--- a/doc/src/Build_package.rst
+++ b/doc/src/Build_package.rst
@@ -37,7 +37,7 @@ packages:
    * :ref:`ADIOS <adios>`
    * :ref:`ATC <atc>`
    * :ref:`AWPMD <awpmd>`
-   * :ref:`COLVARS <colvars>`
+   * :ref:`COLVARS <colvar>`
    * :ref:`COMPRESS <compress>`
    * :ref:`ELECTRODE <electrode>`
    * :ref:`GPU <gpu>`
@@ -46,6 +46,7 @@ packages:
    * :ref:`KIM <kim>`
    * :ref:`KOKKOS <kokkos>`
    * :ref:`LATTE <latte>`
+   * :ref:`LEPTON <lepton>`
    * :ref:`MACHDYN <machdyn>`
    * :ref:`MDI <mdi>`
    * :ref:`MESONT <mesont>`
diff --git a/doc/src/Commands_bond.rst b/doc/src/Commands_bond.rst
index ac2d5882fb..cfc896aa0e 100644
--- a/doc/src/Commands_bond.rst
+++ b/doc/src/Commands_bond.rst
@@ -44,6 +44,7 @@ OPT.
    * :doc:`harmonic (iko) <bond_harmonic>`
    * :doc:`harmonic/shift (o) <bond_harmonic_shift>`
    * :doc:`harmonic/shift/cut (o) <bond_harmonic_shift_cut>`
+   * :doc:`lepton (o) <bond_lepton>`
    * :doc:`mesocnt <bond_mesocnt>`
    * :doc:`mm3 <bond_mm3>`
    * :doc:`morse (o) <bond_morse>`
@@ -93,6 +94,7 @@ OPT.
    * :doc:`fourier/simple (o) <angle_fourier_simple>`
    * :doc:`gaussian <angle_gaussian>`
    * :doc:`harmonic (iko) <angle_harmonic>`
+   * :doc:`lepton (o) <angle_lepton>`
    * :doc:`mesocnt <angle_mesocnt>`
    * :doc:`mm3 <angle_mm3>`
    * :doc:`quartic (o) <angle_quartic>`
@@ -127,6 +129,7 @@ OPT.
    * :doc:`fourier (io) <dihedral_fourier>`
    * :doc:`harmonic (iko) <dihedral_harmonic>`
    * :doc:`helix (o) <dihedral_helix>`
+   * :doc:`lepton (o) <dihedral_lepton>`
    * :doc:`multi/harmonic (o) <dihedral_multi_harmonic>`
    * :doc:`nharmonic (o) <dihedral_nharmonic>`
    * :doc:`opls (iko) <dihedral_opls>`
diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst
index 2fc90652c5..59501b4a56 100644
--- a/doc/src/Commands_pair.rst
+++ b/doc/src/Commands_pair.rst
@@ -134,6 +134,8 @@ OPT.
    * :doc:`lcbop <pair_lcbop>`
    * :doc:`lebedeva/z <pair_lebedeva_z>`
    * :doc:`lennard/mdf <pair_mdf>`
+   * :doc:`lepton (o) <pair_lepton>`
+   * :doc:`lepton/coul (o) <pair_lepton>`
    * :doc:`line/lj <pair_line_lj>`
    * :doc:`lj/charmm/coul/charmm (giko) <pair_charmm>`
    * :doc:`lj/charmm/coul/charmm/implicit (ko) <pair_charmm>`
diff --git a/doc/src/Packages_details.rst b/doc/src/Packages_details.rst
index 969765e3be..79d5d872aa 100644
--- a/doc/src/Packages_details.rst
+++ b/doc/src/Packages_details.rst
@@ -68,6 +68,7 @@ page gives those details.
    * :ref:`KSPACE <PKG-KSPACE>`
    * :ref:`LATBOLTZ <PKG-LATBOLTZ>`
    * :ref:`LATTE <PKG-LATTE>`
+   * :ref:`LEPTON <PKG-LEPTON>`
    * :ref:`MACHDYN <PKG-MACHDYN>`
    * :ref:`MANIFOLD <PKG-MANIFOLD>`
    * :ref:`MANYBODY <PKG-MANYBODY>`
@@ -492,22 +493,21 @@ COLVARS package
 
 **Contents:**
 
-COLVARS stands for collective variables, which can be used to
-implement various enhanced sampling methods, including Adaptive
-Biasing Force, Metadynamics, Steered MD, Umbrella Sampling and
-Restraints.  A :doc:`fix colvars <fix_colvars>` command is implemented
-which wraps a COLVARS library, which implements these methods.
-simulations.
+Colvars stands for collective variables, which can be used to implement
+various enhanced sampling methods, including Adaptive Biasing Force,
+Metadynamics, Steered MD, Umbrella Sampling and Restraints.  A :doc:`fix
+colvars <fix_colvars>` command is implemented which wraps a COLVARS
+library, which implements these methods.  simulations.
 
-**Authors:** The COLVARS library is written and maintained by
-Giacomo Fiorin (ICMS, Temple University, Philadelphia, PA, USA)
-and Jerome Henin (LISM, CNRS, Marseille, France), originally for
-the NAMD MD code, but with portability in mind.  Axel Kohlmeyer
-(Temple U) provided the interface to LAMMPS.
+**Authors:** The COLVARS library is written and maintained by Giacomo
+Fiorin (NIH, Bethesda, MD, USA) and Jerome Henin (CNRS, Paris, France),
+originally for the NAMD MD code, but with portability in mind.  Axel
+Kohlmeyer (Temple U) provided the interface to LAMMPS.
 
 **Install:**
 
-This package has :ref:`specific installation instructions <colvars>` on the :doc:`Build extras <Build_extras>` page.
+This package has :ref:`specific installation instructions <colvar>` on
+the :doc:`Build extras <Build_extras>` page.
 
 **Supporting info:**
 
@@ -516,6 +516,8 @@ This package has :ref:`specific installation instructions <colvars>` on the :doc
 * src/COLVARS/README
 * lib/colvars/README
 * :doc:`fix colvars <fix_colvars>`
+* :doc:`group2ndx <group2ndx>`
+* :doc:`ndx2group <group2ndx>`
 * examples/PACKAGES/colvars
 
 ----------
@@ -1388,6 +1390,46 @@ the :doc:`Build extras <Build_extras>` page.
 
 ----------
 
+.. _PKG-LEPTON:
+
+LEPTON package
+--------------
+
+**Contents:**
+
+Styles for pair, bond, and angle forces that evaluate the potential
+function from a string using the `Lepton mathematical expression parser
+<https://simtk.org/projects/lepton>`_.  Lepton is a C++ library that is
+bundled with `OpenMM <https://openmm.org/>`_ and can be used for
+parsing, evaluating, differentiating, and analyzing mathematical
+expressions.  This is a more lightweight and efficient alternative for
+evaluating custom potential function to an embedded Python interpreter
+as used in the :ref:`PYTHON package <PKG-PYTHON>`.  On the other hand,
+since the potentials are evaluated form analytical expressions, they are
+more precise than what can be done with :ref:`tabulated potentials
+<tabulate>`.
+
+**Authors:** Axel Kohlmeyer (Temple U).  Lepton itself is developed
+by Peter Eastman at Stanford University.
+
+.. versionadded:: TBD
+
+**Install:**
+
+This package has :ref:`specific installation instructions <lepton>` on
+the :doc:`Build extras <Build_extras>` page.
+
+**Supporting info:**
+
+* src/LEPTON: filenames -> commands
+* lib/lepton/README.md
+* :doc:`pair_style lepton <pair_lepton>`
+* :doc:`bond_style lepton <bond_lepton>`
+* :doc:`angle_style lepton <angle_lepton>`
+* :doc:`dihedral_style lepton <dihedral_lepton>`
+
+----------
+
 .. _PKG-MACHDYN:
 
 MACHDYN package
diff --git a/doc/src/Packages_list.rst b/doc/src/Packages_list.rst
index fa887e7977..9e5727bb0e 100644
--- a/doc/src/Packages_list.rst
+++ b/doc/src/Packages_list.rst
@@ -238,6 +238,11 @@ whether an extra library is needed to build and use the package:
      - :doc:`fix latte <fix_latte>`
      - latte
      - ext
+   * - :ref:`LEPTON <PKG-LEPTON>`
+     - evaluate strings as potential function
+     - :doc:`pair_style lepton <pair_lepton>`
+     - PACKAGES/lepton
+     - int
    * - :ref:`MACHDYN <PKG-MACHDYN>`
      - smoothed Mach dynamics
      - `SMD User Guide <PDF/MACHDYN_LAMMPS_userguide.pdf>`_
diff --git a/doc/src/angle_lepton.rst b/doc/src/angle_lepton.rst
new file mode 100644
index 0000000000..ea948c6a5f
--- /dev/null
+++ b/doc/src/angle_lepton.rst
@@ -0,0 +1,94 @@
+.. index:: angle_style lepton
+.. index:: angle_style lepton/omp
+
+angle_style lepton command
+==========================
+
+Accelerator Variants: *lepton/omp*
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   angle_style lepton
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   angle_style lepton
+
+   angle_coeff  1  120.0  "k*theta^2; k=250.0"
+   angle_coeff  2   90.0  "k2*theta^2 + k3*theta^3 + k4*theta^4; k2=300.0; k3=-100.0; k4=50.0"
+   angle_coeff  3  109.47 "k*theta^2; k=350.0"
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+Angle style *lepton* computes angular interactions between three atoms
+with a custom potential function.  The potential function must be
+provided as an expression string using "theta" as the angle variable
+relative to the reference angle :math:`\theta_0` which is provided as an
+angle coefficient.  For example `"200.0*theta^2"` represents a
+:doc:`harmonic angle <angle_harmonic>` potential with a force constant
+*K* of 200.0 energy units:
+
+.. math::
+
+   U_{angle,i} = K (\theta_i - \theta_0)^2 = K \theta^2 \qquad \theta = \theta_i - \theta_0
+
+The `Lepton library <https://simtk.org/projects/lepton>`_, that the
+*lepton* angle style interfaces with, evaluates this expression string
+at run time to compute the pairwise energy.  It also creates an
+analytical representation of the first derivative of this expression
+with respect to "theta" and then uses that to compute the force between
+the angle atoms as defined by the topology data.
+
+The following coefficients must be defined for each angle type via the
+:doc:`angle_coeff <angle_coeff>` command as in the example above, or in
+the data file or restart files read by the :doc:`read_data <read_data>`
+or :doc:`read_restart <read_restart>` commands:
+
+* Lepton expression (energy units)
+* :math:`\theta_0` (degrees)
+
+The Lepton expression must be either enclosed in quotes or must not
+contain any whitespace so that LAMMPS recognizes it as a single keyword.
+More on valid Lepton expressions below.  The :math:`\theta_0`
+coefficient is the "equilibrium angle".  It is entered in degrees, but
+internally converted to radians.  Thus the expression must assume
+"theta" is in radians.  The potential energy function in the Lepton
+expression is shifted in such a way, that the potential energy is 0 for
+a angle :math:`\theta_i == \theta_0`.
+
+----------
+
+.. include:: lepton_expression.rst
+
+----------
+
+.. include:: accel_styles.rst
+
+----------
+
+Restrictions
+""""""""""""
+
+This angle style is part of the LEPTON package and only enabled if LAMMPS
+was built with this package.  See the :doc:`Build package
+<Build_package>` page for more info.
+
+Related commands
+""""""""""""""""
+
+:doc:`angle_coeff <angle_coeff>`, :doc:`angle_style table <angle_table>`,
+:doc:`bond_style lepton <bond_lepton>`,:doc:`dihedral_style lepton <dihedral_lepton>`
+
+Default
+"""""""
+
+none
diff --git a/doc/src/angle_style.rst b/doc/src/angle_style.rst
index 024481ce68..1f1ae72647 100644
--- a/doc/src/angle_style.rst
+++ b/doc/src/angle_style.rst
@@ -10,7 +10,7 @@ Syntax
 
    angle_style style
 
-* style = *none* or *zero* or *hybrid* or *amoeba* or *charmm* or *class2* or *class2/p6* or *cosine* or *cosine/buck6d* or *cosine/delta* or *cosine/periodic* or *cosine/shift* or *cosine/shift/exp* or *cosine/squared* or *cross* or *dipole* or *fourier* or *fourier/simple* or *gaussian* or *harmonic* or *mm3* or *quartic* or *spica* or *table*
+* style = *none* or *zero* or *hybrid* or *amoeba* or *charmm* or *class2* or *class2/p6* or *cosine* or *cosine/buck6d* or *cosine/delta* or *cosine/periodic* or *cosine/shift* or *cosine/shift/exp* or *cosine/squared* or *cross* or *dipole* or *fourier* or *fourier/simple* or *gaussian* or *harmonic* or *lepton* or *mm3* or *quartic* or *spica* or *table*
 
 Examples
 """"""""
@@ -90,6 +90,7 @@ of (g,i,k,o,t) to indicate which accelerated styles exist.
 * :doc:`fourier/simple <angle_fourier_simple>` - angle with a single cosine term
 * :doc:`gaussian <angle_gaussian>` - multi-centered Gaussian-based angle potential
 * :doc:`harmonic <angle_harmonic>` - harmonic angle
+* :doc:`lepton <angle_lepton>` - angle potential from evaluating a string
 * :doc:`mesocnt <angle_mesocnt>` - piecewise harmonic and linear angle for bending-buckling of nanotubes
 * :doc:`mm3 <angle_mm3>` - anharmonic angle
 * :doc:`quartic <angle_quartic>` - angle with cubic and quartic terms
diff --git a/doc/src/bond_lepton.rst b/doc/src/bond_lepton.rst
new file mode 100644
index 0000000000..91f040e183
--- /dev/null
+++ b/doc/src/bond_lepton.rst
@@ -0,0 +1,92 @@
+.. index:: bond_style lepton
+.. index:: bond_style lepton/omp
+
+bond_style lepton command
+=========================
+
+Accelerator Variants: *lepton/omp*
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   bond_style lepton
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   bond_style lepton
+
+   bond_coeff  1  1.5 "k*r^2; k=250.0"
+   bond_coeff  2  1.1 "k2*r^2 + k3*r^3 + k4*r^4; k2=300.0; k3=-100.0; k4=50.0"
+   bond_coeff  3  1.3 "k*r^2; k=350.0"
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+Bond style *lepton* computes bonded interactions between two atoms with
+a custom function.  The potential function must be provided as an
+expression string using "r" as the distance variable relative to the
+reference distance :math:`r_0` which is provided as a bond coefficient.
+For example `"200.0*r^2"` represents a harmonic potential with a force
+constant *K* of 200.0 energy units:
+
+.. math::
+
+   U_{bond,i} = K (r_i - r_0)^2 = K r^2 \qquad r = r_i - r_0
+
+The `Lepton library <https://simtk.org/projects/lepton>`_, that the
+*lepton* bond style interfaces with, evaluates this expression string at
+run time to compute the pairwise energy.  It also creates an analytical
+representation of the first derivative of this expression with respect to
+"r" and then uses that to compute the force between the atom pairs forming
+bonds as defined by the topology data.
+
+The following coefficients must be defined for each bond type via the
+:doc:`bond_coeff <bond_coeff>` command as in the examples above, or in
+the data file or restart files read by the :doc:`read_data <read_data>`
+or :doc:`read_restart <read_restart>` commands:
+
+* Lepton expression (energy units)
+* :math:`r_0` (distance)
+
+The Lepton expression must be either enclosed in quotes or must not
+contain any whitespace so that LAMMPS recognizes it as a single keyword.
+More on valid Lepton expressions below.  The :math:`r_0` is the
+"equilibrium distance".  The potential energy function in the Lepton
+expression is shifted in such a way, that the potential energy is 0 for
+a bond length :math:`r_i == r_0`.
+
+----------
+
+.. include:: lepton_expression.rst
+
+----------
+
+.. include:: accel_styles.rst
+
+----------
+
+Restrictions
+""""""""""""
+
+This bond style is part of the LEPTON package and only enabled if LAMMPS
+was built with this package.  See the :doc:`Build package
+<Build_package>` page for more info.
+
+Related commands
+""""""""""""""""
+
+:doc:`bond_coeff <bond_coeff>`, :doc:`bond_style table <bond_table>`,
+:doc:`bond_write <bond_write>`, :doc:`angle_style lepton <angle_lepton>`,
+:doc:`dihedral_style lepton <dihedral_lepton>`
+
+Default
+"""""""
+
+none
diff --git a/doc/src/bond_style.rst b/doc/src/bond_style.rst
index 9197e6c4eb..23b89d00a2 100644
--- a/doc/src/bond_style.rst
+++ b/doc/src/bond_style.rst
@@ -10,7 +10,7 @@ Syntax
 
    bond_style style args
 
-* style = *none* or *zero* or *hybrid* or *bpm/rotational* or *bpm/spring* or *class2* or *fene* or *fene/expand* or *fene/nm* or *gaussian* or *gromos* or *harmonic* or *harmonic/shift* or *harmonic/shift/cut* or *morse* or *nonlinear* or *oxdna/fene* or *oxdena2/fene* or *oxrna2/fene* or *quartic* or *special* or *table*
+* style = *none* or *zero* or *hybrid* or *bpm/rotational* or *bpm/spring* or *class2* or *fene* or *fene/expand* or *fene/nm* or *gaussian* or *gromos* or *harmonic* or *harmonic/shift* or *harmonic/shift/cut* or *lepton* or *morse* or *nonlinear* or *oxdna/fene* or *oxdena2/fene* or *oxrna2/fene* or *quartic* or *special* or *table*
 
 * args = none for any style except *hybrid*
 
@@ -95,6 +95,7 @@ accelerated styles exist.
 * :doc:`harmonic <bond_harmonic>` - harmonic bond
 * :doc:`harmonic/shift <bond_harmonic_shift>` - shifted harmonic bond
 * :doc:`harmonic/shift/cut <bond_harmonic_shift_cut>` - shifted harmonic bond with a cutoff
+* :doc:`lepton <bond_lepton>` - bond potential from evaluating a string
 * :doc:`mesocnt <bond_mesocnt>` - Harmonic bond wrapper with parameterization presets for nanotubes
 * :doc:`mm3 <bond_mm3>` - MM3 anharmonic bond
 * :doc:`morse <bond_morse>` - Morse bond
diff --git a/doc/src/dihedral_lepton.rst b/doc/src/dihedral_lepton.rst
new file mode 100644
index 0000000000..e030c3b7c4
--- /dev/null
+++ b/doc/src/dihedral_lepton.rst
@@ -0,0 +1,89 @@
+.. index:: dihedral_style lepton
+.. index:: dihedral_style lepton/omp
+
+dihedral_style lepton command
+=============================
+
+Accelerator Variants: *lepton/omp*
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   dihedral_style lepton
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   dihedral_style lepton
+
+   dihedral_coeff  1 "k*(1 + d*cos(n*phi)); k=75.0; d=1; n=2"
+   dihedral_coeff  2 "45*(1-cos(4*phi))"
+   dihedral_coeff  2 "k2*cos(phi) + k3*cos(phi)^2; k2=100.0"
+   dihedral_coeff  3 "k*(phi-phi0)^2; k=85.0; phi0=120.0"
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+Dihedral style *lepton* computes dihedral interactions between four
+atoms forming a dihedral angle with a custom potential function.  The
+potential function must be provided as an expression string using "phi"
+as the dihedral angle variable.  For example `"200.0*(phi-120.0)^2"`
+represents a :doc:`quadratic dihedral <dihedral_quadratic>` potential
+around a 120 degree dihedral angle with a force constant *K* of 200.0
+energy units:
+
+.. math::
+
+   U_{dihedral,i} = K (\phi_i - \phi_0)^2
+
+The `Lepton library <https://simtk.org/projects/lepton>`_, that the
+*lepton* dihedral style interfaces with, evaluates this expression
+string at run time to compute the pairwise energy.  It also creates an
+analytical representation of the first derivative of this expression
+with respect to "phi" and then uses that to compute the force between
+the dihedral atoms as defined by the topology data.
+
+The potential function expression for each dihedral type is provided via the
+:doc:`dihedral_coeff <dihedral_coeff>` command as in the example above, or in
+the data file or restart files read by the :doc:`read_data <read_data>`
+or :doc:`read_restart <read_restart>` commands.  The expression is in energy units.
+
+The Lepton expression must be either enclosed in quotes or must not
+contain any whitespace so that LAMMPS recognizes it as a single keyword.
+More on valid Lepton expressions below.  Dihedral angles are internally
+computed in radians and thus the expression must assume "phi" is in
+radians.
+
+----------
+
+.. include:: lepton_expression.rst
+
+----------
+
+.. include:: accel_styles.rst
+
+----------
+
+Restrictions
+""""""""""""
+
+This dihedral style is part of the LEPTON package and only enabled if LAMMPS
+was built with this package.  See the :doc:`Build package
+<Build_package>` page for more info.
+
+Related commands
+""""""""""""""""
+
+:doc:`dihedral_coeff <dihedral_coeff>`, :doc:`dihedral_style table <dihedral_table>`,
+:doc:`bond_style lepton <bond_lepton>`, :doc:`angle_style lepton <angle_lepton>`
+
+Default
+"""""""
+
+none
diff --git a/doc/src/dihedral_style.rst b/doc/src/dihedral_style.rst
index 4e56d1f787..45dd66e750 100644
--- a/doc/src/dihedral_style.rst
+++ b/doc/src/dihedral_style.rst
@@ -10,7 +10,7 @@ Syntax
 
    dihedral_style style
 
-* style = *none* or *zero* or *hybrid* or *charmm* or *charmmfsw* or *class2* or *osine/shift/exp* or *fourier* or *harmonic* or *helix* or *multi/harmonic* or *nharmonic* or *opls* or *spherical* or *table* or *table/cut*
+* style = *none* or *zero* or *hybrid* or *charmm* or *charmmfsw* or *class2* or *cosine/shift/exp* or *fourier* or *harmonic* or *helix* or *lepton* or *multi/harmonic* or *nharmonic* or *opls* or *spherical* or *table* or *table/cut*
 
 Examples
 """"""""
@@ -108,6 +108,7 @@ exist.
 * :doc:`fourier <dihedral_fourier>` - dihedral with multiple cosine terms
 * :doc:`harmonic <dihedral_harmonic>` - harmonic dihedral
 * :doc:`helix <dihedral_helix>` - helix dihedral
+* :doc:`lepton <dihedral_lepton>` - dihedral potential from evaluating a string
 * :doc:`multi/harmonic <dihedral_multi_harmonic>` - dihedral with 5 harmonic terms
 * :doc:`nharmonic <dihedral_nharmonic>` - same as multi-harmonic with N terms
 * :doc:`opls <dihedral_opls>` - OPLS dihedral
diff --git a/doc/src/fix_colvars.rst b/doc/src/fix_colvars.rst
index ec7b33ce51..77a90cc54f 100644
--- a/doc/src/fix_colvars.rst
+++ b/doc/src/fix_colvars.rst
@@ -37,57 +37,64 @@ Description
 
 This fix interfaces LAMMPS to the collective variables (Colvars)
 library, which allows to calculate potentials of mean force (PMFs) for
-any set of colvars, using different sampling methods: currently
-implemented are the Adaptive Biasing Force (ABF) method, metadynamics,
-Steered Molecular Dynamics (SMD) and Umbrella Sampling (US) via a
-flexible harmonic restraint bias.
+any set of colvars, using sampling methods, including but not limited to
+Adaptive Biasing Force (ABF), metadynamics (MtD), Steered Molecular
+Dynamics (SMD) and Umbrella Sampling (US) via a flexible harmonic
+restraint bias.
 
-This documentation describes only the fix colvars command itself and
-LAMMPS specific parts of the code.  The full documentation of the
-colvars library is available as `this supplementary PDF document <PDF/colvars-refman-lammps.pdf>`_
+This documentation describes only the ``fix colvars`` command itself in
+a LAMMPS script.  The Colvars library is documented via the included
+`PDF manual <PDF/colvars-refman-lammps.pdf>`_ or at the webpage
+`https://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html
+<https://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html>`_.
 
-The Colvars library is developed at `https://github.com/colvars/colvars <https://github.com/colvars/colvars>`_
-A detailed discussion of its implementation is in :ref:`(Fiorin) <Fiorin>`.
+The Colvars library is developed at `https://github.com/Colvars/colvars
+<https://github.com/Colvars/colvars>`_ A detailed discussion of its
+implementation is in :ref:`(Fiorin) <Fiorin>`; additional references are
+printed at runtime based on specific features being used.
 
 There are some example scripts for using this package with LAMMPS in the
-examples/PACKAGES/colvars directory.
+``examples/PACKAGES/colvars`` directory.
 
 ----------
 
-The only mandatory argument to the fix is the filename to the colvars
-input file that contains the input that is independent from the MD
-program in which the colvars library has been integrated.
+The only required argument to ``fix colvars`` is the filename to the
+Colvars configuration file that contains the definition of the variables
+and any biasing methods applied to them.  from the MD program in which
+the colvars library has been integrated.
 
-The *group-ID* entry is ignored. The collective variable module will
-always apply to the entire system and there can only be one instance
-of the colvars fix at a time. The colvars fix will only communicate
-the minimum information necessary and the colvars library supports
-multiple, completely independent collective variables, so there is
-no restriction to functionality by limiting the number of colvars fixes.
+The *group-ID* entry is ignored.  ``fix colvars`` will always apply to
+the entire system, but specific atoms will be selected based on
+selection keywords in the Colvars configuration file or files.  There is
+no need to define multiple ``fix colvars`` instances and it is not
+allowed.
 
-The *input* keyword allows to specify a state file that would contain
-the restart information required in order to continue a calculation from
-a prerecorded state. Fix colvars records it state in :doc:`binary restart <restart>`
-files, so when using the :doc:`read_restart <read_restart>` command,
-this is usually not needed.
+The *output* keyword allows to specify the prefix of output files
+generated by Colvars, for example ``output.colvars.traj`` or
+``output.pmf``.
 
-The *output* keyword allows to specify the output prefix. All output
-files generated will use this prefix followed by the ".colvars." and
-a word like "state" or "traj".
+The *input* keyword allows to specify an optional state file that
+contains the restart information needed to continue a previous
+simulation state.  Note, however, that ``fix colvars`` records its state
+in :doc:`binary restart <restart>` files, so when using the
+:doc:`read_restart <read_restart>` command, this is usually not needed.
 
 The *seed* keyword contains the seed for the random number generator
-that will be used in the colvars module.
+used by Colvars.
 
 The *unwrap* keyword controls whether wrapped or unwrapped coordinates
-are passed to the colvars library for calculation of the collective
+are passed to the Colvars library for calculation of the collective
 variables and the resulting forces. The default is *yes*, i.e. to use
-the image flags to reconstruct the absolute atom positions.
-Setting this to *no* will use the current local coordinates that are
-wrapped back into the simulation cell at each re-neighboring instead.
+the image flags to reconstruct the absolute atom positions.  Setting
+this to *no* will use the current local coordinates that are wrapped
+back into the simulation cell at each re-neighboring instead.  For
+information about when and how this affects results, please see
+`https://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html#sec:colvar_atom_groups_wrapping
+<https://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html#sec:colvar_atom_groups_wrapping>`_.
 
 The *tstat* keyword can be either NULL or the label of a thermostatting
-fix that thermostats all atoms in the fix colvars group. This will be
-used to provide the colvars module with the current thermostat target
+fix that thermostats all atoms in the fix colvars group.  This will be
+used to let Colvars know what is the current thermostat target
 temperature.
 
 Restart, fix_modify, output, run start/stop, minimize info
@@ -95,41 +102,42 @@ Restart, fix_modify, output, run start/stop, minimize info
 
 This fix writes the current status of the colvars module into
 :doc:`binary restart files <restart>`. This is in addition to the text
-mode status file that is written by the colvars module itself and the
-kind of information in both files is identical.
+mode ``.colvars.state`` written by Colvars itself and the information in
+both files is identical.
 
-The :doc:`fix_modify <fix_modify>` *energy* option is supported by
-this fix to add the energy change from the biasing force added by
-Colvars to the global potential energy of the system as part of
-:doc:`thermodynamic output <thermo_style>`.  The default setting for
-this fix is :doc:`fix_modify energy no <fix_modify>`.
+The :doc:`fix_modify <fix_modify>` *energy* option is supported by this
+fix to add the energy change from the biasing force added by Colvars to
+the global potential energy of the system as part of :doc:`thermodynamic
+output <thermo_style>`.  The default setting for this fix is
+:doc:`fix_modify energy no <fix_modify>`.
 
-The *fix_modify configfile <config file>* option allows to add settings
-from an additional config file to the colvars module. This option can
-only be used, after the system has been initialized with a :doc:`run <run>`
-command.
+The *fix_modify configfile <config file>* option loads Colvars
+configuration from an additional file. This option can only be used,
+after the system has been initialized with a :doc:`run <run>` command.
 
 The *fix_modify config <quoted string>* option allows to add settings
-from inline strings. Those have to fit on a single line when enclosed
-in a pair of double quotes ("), or can span multiple lines when bracketed
-by a pair of triple double quotes (""", like python embedded documentation).
+from inline strings. Those have to fit on a single line when enclosed in
+a pair of double quotes ("), or can span multiple lines when bracketed
+by a pair of triple double quotes (""", like Python embedded
+documentation).
 
 This fix computes a global scalar which can be accessed by various
-:doc:`output commands <Howto_output>`.  The scalar is the Colvars
-energy mentioned above.  The scalar value calculated by this fix is
+:doc:`output commands <Howto_output>`.  The scalar is the Colvars energy
+mentioned above.  The scalar value calculated by this fix is
 "extensive".
 
 Restrictions
 """"""""""""
 
-This fix is part of the COLVARS package.  It is only enabled if
-LAMMPS was built with that package.  See the :doc:`Build package
-<Build_package>` page for more info.
+``fix colvars`` is provided by the COLVARS package and is only available
+if LAMMPS was built with that package.  Some of the features also
+require code available from the LEPTON package.  See the :doc:`Build
+package <Build_package>` page for more info.
 
-There can only be one colvars fix active at a time. Since the interface
-communicates only the minimum amount of information and colvars module
-itself can handle an arbitrary number of collective variables, this is
-not a limitation of functionality.
+There can only be one Colvars instance defined at a time.  Since the
+interface communicates only the minimum amount of information and the
+Colvars module itself can handle an arbitrary number of collective
+variables, this is not a limitation of functionality.
 
 Related commands
 """"""""""""""""
diff --git a/doc/src/lepton_expression.rst b/doc/src/lepton_expression.rst
new file mode 100644
index 0000000000..9ecebc921e
--- /dev/null
+++ b/doc/src/lepton_expression.rst
@@ -0,0 +1,122 @@
+
+Lepton expression syntax and features
+"""""""""""""""""""""""""""""""""""""
+
+Lepton supports the following operators in expressions:
+
+.. table_from_list::
+   :columns: 14
+
+   * \+
+   * Add
+   *
+   * \-
+   * Subtract
+   *
+   * \*
+   * Multiply
+   *
+   * \/
+   * Divide
+   *
+   * \^
+   * Power
+
+The following mathematical functions are available:
+
+.. table_from_list::
+   :columns: 4
+
+   * sqrt(x)
+   * Square root
+   * exp(x)
+   * Exponential
+   * log(x)
+   * Natural logarithm
+   * sin(x)
+   * Sine (angle in radians)
+   * cos(x)
+   * Cosine (angle in radians)
+   * sec(x)
+   * Secant (angle in radians)
+   * csc(x)
+   * Cosecant (angle in radians)
+   * tan(x)
+   * Tangent (angle in radians)
+   * cot(x)
+   * Cotangent (angle in radians)
+   * asin(x)
+   * Inverse sine (in radians)
+   * acos(x)
+   * Inverse cosine (in radians)
+   * atan(x)
+   * Inverse tangent (in radians)
+   * sinh(x)
+   * Hyperbolic sine
+   * cosh(x)
+   * Hyperbolic cosine
+   * tanh(x)
+   * Hyperbolic tangent
+   * erf(x)
+   * Error function
+   * erfc(x)
+   * Complementary Error function
+   * abs(x)
+   * Absolute value
+   * min(x,y)
+   * Minimum of two values
+   * max(x,y)
+   * Maximum of two values
+   * delta(x)
+   * delta(x) is 1 for `x = 0`, otherwise 0
+   * step(x)
+   * step(x) is 0 for `x < 0`, otherwise 1
+
+Numbers may be given in either decimal or exponential form.  All of the
+following are valid numbers: `5`, `-3.1`, `1e6`, and `3.12e-2`.
+
+As an extension to the standard Lepton syntax, it is also possible to
+use LAMMPS :doc:`variables <variable>` in the format "v_name".  Before
+evaluating the expression, "v_name" will be replaced with the value of
+the variable "name".  This is compatible with all kinds of scalar
+variables, but not with vectors, arrays, local, or per-atom
+variables.  If necessary, a custom scalar variable needs to be defined
+that can access the desired (single) item from a non-scalar variable.
+As an example, the following lines will instruct LAMMPS to ramp
+the force constant for a harmonic bond from 100.0 to 200.0 during the
+next run:
+
+.. code-block:: LAMMPS
+
+   variable fconst equal ramp(100.0, 200)
+   bond_style lepton
+   bond_coeff 1 1.5 "v_fconst * (r^2)"
+
+An expression may be followed by definitions for intermediate values that appear in the
+expression. A semicolon ";" is used as a delimiter between value definitions. For example,
+the expression:
+
+.. code-block:: C
+
+   a^2+a*b+b^2; a=a1+a2; b=b1+b2
+
+is exactly equivalent to
+
+.. code-block:: C
+
+   (a1+a2)^2+(a1+a2)*(b1+b2)+(b1+b2)^2
+
+The definition of an intermediate value may itself involve other
+intermediate values. Whitespace and quotation characters ('\'' and '"')
+are ignored.  All uses of a value must appear *before* that value's
+definition.  For efficiency reasons, the expression string is parsed,
+optimized, and then stored in an internal, pre-parsed representation for
+evaluation.
+
+Evaluating a Lepton expression is typically between 2.5 and 5 times
+slower than the corresponding compiled and optimized C++ code.  If
+additional speed or GPU acceleration (via GPU or KOKKOS) is required,
+the interaction can be represented as a table.  Suitable table files
+can be created either internally using the :doc:`pair_write <pair_write>`
+or :doc:`bond_write <bond_write>` command or through the Python scripts
+in the :ref:`tools/tabulate <tabulate>` folder.
diff --git a/doc/src/pair_lepton.rst b/doc/src/pair_lepton.rst
new file mode 100644
index 0000000000..303bc13bb9
--- /dev/null
+++ b/doc/src/pair_lepton.rst
@@ -0,0 +1,159 @@
+.. index:: pair_style lepton
+.. index:: pair_style lepton/omp
+.. index:: pair_style lepton/coul
+.. index:: pair_style lepton/coul/omp
+
+pair_style lepton command
+=========================
+
+Accelerator Variants: *lepton/omp*, *lepton/coul/comp*
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   pair_style style args
+
+* style = *lepton* or *lepton/coul*
+* args = list of arguments for a particular style
+
+.. parsed-literal::
+
+    *lepton* args = cutoff
+      cutoff = global cutoff for the interactions (distance units)
+    *lepton/coul* args = cutoff keyword
+      cutoff = global cutoff for the interactions (distance units)
+      zero or more keywords may be appended
+      keyword = *ewald* or *pppm* or *msm* or *dispersion* or *tip4p*
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   pair_style lepton 2.5
+
+   pair_coeff  * *  "k*((r-r0)^2*step(r0-r)); k=200; r0=1.5" 2.0
+   pair_coeff  1 2  "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=1.0;sig=1.0" 1.12246204830937
+   pair_coeff  2 2  "eps*(2.0*(sig/r)^9 - 3.0*(sig/r)^6);eps=1.0;sig=1.0"
+
+   pair_style lepton/coul 2.5
+   pair_coeff 1 1 "qi*qj/r" 4.0
+   pair_coeff 1 2 "lj+coul; lj=4.0*eps*((sig/r)^12 - (sig/r)^6); eps=1.0; sig=1.0; coul=qi*qj/r"
+
+   pair_style lepton/coul 2.5 pppm
+   kspace_style pppm 1.0e-4
+   pair_coeff 1 1 "qi*qj/r*erfc(alpha*r); alpha=1.067"
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+Pair styles *lepton* and *lepton/coul* compute pairwise interactions
+between particles which depend solely on the distance and have a cutoff.
+The potential function must be provided as an expression string using
+"r" as the distance variable.  With pair style *lepton/coul* one may
+additionally reference the charges of the two atoms of the pair with
+"qi" and "qj", respectively.  Note that further constants in the
+expression can be defined in the same string as additional expressions
+separated by semi-colons as shown in the examples above.
+
+The expression `"200.0*(r-1.5)^2"` represents a harmonic potential
+around the pairwise distance :math:`r_0` of 1.5 distance units and a
+force constant *K* of 200.0 energy units:
+
+.. math::
+
+   U_{ij} = K (r-r_0)^2
+
+The expression `"qi*qj/r"` represents a regular Coulombic potential with cutoff:
+
+.. math::
+
+   U_{ij} = \frac{C q_i q_j}{\epsilon  r} \qquad r < r_c
+
+The `Lepton library <https://simtk.org/projects/lepton>`_, that the
+*lepton* pair style interfaces with, evaluates this expression string at
+run time to compute the pairwise energy.  It also creates an analytical
+representation of the first derivative of this expression with respect
+to "r" and then uses that to compute the force between the pairs of
+particles within the given cutoff.
+
+The following coefficients must be defined for each pair of atoms types
+via the :doc:`pair_coeff <pair_coeff>` command as in the examples above,
+or in the data file or restart files read by the :doc:`read_data
+<read_data>` or :doc:`read_restart <read_restart>` commands:
+
+* Lepton expression (energy units)
+* cutoff (distance units)
+
+The Lepton expression must be either enclosed in quotes or must not
+contain any whitespace so that LAMMPS recognizes it as a single keyword.
+More on valid Lepton expressions below.  The last coefficient is
+optional; it allows to set the cutoff for a pair of atom types to a
+different value than the global cutoff.
+
+For pair style *lepton* only the "lj" value of the :doc:`special_bonds <special_bonds>`
+settings apply in case the interacting pair is also connected with a bond.
+The potential energy will *only* be added to the "evdwl" property.
+
+For pair style *lepton/coul* only the "coul" value of the :doc:`special_bonds <special_bonds>`
+settings apply in case the interacting pair is also connected with a bond.
+The potential energy will *only* be added to the "ecoul" property.
+
+----------
+
+.. include:: lepton_expression.rst
+
+----------
+
+.. include:: accel_styles.rst
+
+----------
+
+Mixing, shift, table, tail correction, restart, rRESPA info
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+Pair styles *lepton* and *lepton/coul* do not support mixing.  Thus,
+expressions for *all* I,J pairs must be specified explicitly.
+
+Only pair style *lepton* supports the :doc:`pair_modify shift <pair_modify>`
+option for shifting the energy of the pair interaction so that it is
+0 at the cutoff, pair style *lepton/coul* does *not*.
+
+The :doc:`pair_modify table <pair_modify>` options are not relevant for
+the these pair styles.
+
+These pair styles do not support the :doc:`pair_modify tail
+<pair_modify>` option for adding long-range tail corrections to energy
+and pressure.
+
+These pair styles write its information to :doc:`binary restart files
+<restart>`, so pair_style and pair_coeff commands do not need to be
+specified in an input script that reads a restart file.
+
+These pair styles can only be used via the *pair* keyword of the
+:doc:`run_style respa <run_style>` command.  They do not support the
+*inner*, *middle*, *outer* keywords.
+
+----------
+
+Restrictions
+""""""""""""
+
+These pair styles are part of the LEPTON package and only enabled if
+LAMMPS was built with this package.  See the :doc:`Build package
+<Build_package>` page for more info.
+
+Related commands
+""""""""""""""""
+
+:doc:`pair_coeff <pair_coeff>`, :doc:`pair_style python <pair_python>`,
+:doc:`pair_style table <pair_table>`, :doc:`pair_write <pair_write>`
+
+Default
+"""""""
+
+none
diff --git a/doc/src/pair_style.rst b/doc/src/pair_style.rst
index 48daf34f17..3f91bfc0b4 100644
--- a/doc/src/pair_style.rst
+++ b/doc/src/pair_style.rst
@@ -212,6 +212,8 @@ accelerated styles exist.
 * :doc:`lcbop <pair_lcbop>` - long-range bond-order potential (LCBOP)
 * :doc:`lebedeva/z <pair_lebedeva_z>` - Lebedeva interlayer potential for graphene with normals along z-axis
 * :doc:`lennard/mdf <pair_mdf>` - LJ potential in A/B form with a taper function
+* :doc:`lepton <pair_lepton>` - pair potential from evaluating a string
+* :doc:`lepton/coul <pair_lepton>` - pair potential from evaluating a string with support for charges
 * :doc:`line/lj <pair_line_lj>` - LJ potential between line segments
 * :doc:`list <pair_list>` - potential between pairs of atoms explicitly listed in an input file
 * :doc:`lj/charmm/coul/charmm <pair_charmm>` - CHARMM potential with cutoff Coulomb
diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt
index c1eb202525..1724cb4982 100644
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@@ -554,6 +554,7 @@ corotate
 corotation
 corotational
 correlator
+Cosecant
 cosineshifted
 cossq
 costheta
@@ -586,6 +587,7 @@ Crozier
 Cryst
 Crystallogr
 Csanyi
+csc
 csg
 csh
 cshrc
@@ -2236,7 +2238,7 @@ msm
 msmflag
 msse
 msst
-Mtchell
+MtD
 Mth
 mtk
 Mtotal
@@ -3277,6 +3279,7 @@ Simul
 simulations
 Sinkovits
 Sinnott
+sinh
 sinusoid
 sinusoidally
 SiO
diff --git a/lib/README b/lib/README
index ab71e6763c..255077bb1b 100644
--- a/lib/README
+++ b/lib/README
@@ -33,6 +33,8 @@ kim           hooks to the KIM library, used by KIM package
                 from Ryan Elliott and Ellad Tadmor (U Minn)
 kokkos        Kokkos package for GPU and many-core acceleration
                 from Kokkos development team (Sandia)
+lepton        Lepton library for fast evaluation of mathematical
+                expressions from a string. Imported from OpenMM.
 linalg        set of BLAS and LAPACK routines needed by ATC package
                 from Axel Kohlmeyer (Temple U)
 mdi           hooks to the MDI library, used by MDI package
diff --git a/lib/colvars/Makefile.common b/lib/colvars/Makefile.common
index 31a93652ae..356a7f4a91 100644
--- a/lib/colvars/Makefile.common
+++ b/lib/colvars/Makefile.common
@@ -61,29 +61,25 @@ COLVARS_SRCS = \
         colvarvalue.cpp \
         colvar_neuralnetworkcompute.cpp
 
-LEPTON_SRCS = \
-	lepton/src/CompiledExpression.cpp \
-	lepton/src/CompiledVectorExpression.cpp \
-	lepton/src/ExpressionProgram.cpp \
-	lepton/src/ExpressionTreeNode.cpp \
-	lepton/src/Operation.cpp \
-	lepton/src/ParsedExpression.cpp \
-	lepton/src/Parser.cpp
-
 # Allow to selectively turn off Lepton
 ifeq ($(COLVARS_LEPTON),no)
-LEPTON_INCFLAGS = 
-COLVARS_OBJS = $(COLVARS_SRCS:.cpp=.o)
+
+LEPTON_INCFLAGS =
+
 else
-LEPTON_INCFLAGS = -Ilepton/include -DLEPTON
-COLVARS_OBJS = $(COLVARS_SRCS:.cpp=.o) $(LEPTON_SRCS:.cpp=.o)
+
+LEPTON_DIR = ../lepton
+include $(LEPTON_DIR)/Settings.mk
+LEPTON_INCFLAGS = $(LEPTON_INC) $(LEPTON_DEF)
+
 endif
 
+COLVARS_OBJS = $(COLVARS_SRCS:.cpp=.o)
 
 %.o: %.cpp
 	$(CXX) $(CXXFLAGS) $(COLVARS_INCFLAGS) $(LEPTON_INCFLAGS) -c -o $@ $<
 
-$(COLVARS_LIB):	Makefile.deps $(COLVARS_OBJS)
+$(COLVARS_LIB): Makefile.deps $(COLVARS_OBJS)
 	$(AR) $(ARFLAGS) $(COLVARS_LIB) $(COLVARS_OBJS)
 
 
@@ -97,12 +93,3 @@ Makefile.deps: $(COLVARS_SRCS)
 
 include Makefile.deps
 
-Makefile.lepton.deps: $(LEPTON_SRCS)
-	@echo > $@
-	@for src in $^ ; do \
-	  obj=`basename $$src .cpp`.o ; \
-	  $(CXX) $(CXXFLAGS) -MM $(LEPTON_INCFLAGS) \
-	    -MT '$$(COLVARS_OBJ_DIR)'$$obj $$src >> $@ ; \
-	  done
-
-include Makefile.lepton.deps
diff --git a/lib/colvars/Makefile.deps b/lib/colvars/Makefile.deps
index d26df41995..6619653af0 100644
--- a/lib/colvars/Makefile.deps
+++ b/lib/colvars/Makefile.deps
@@ -5,327 +5,367 @@ $(COLVARS_OBJ_DIR)colvaratoms.o: colvaratoms.cpp colvarmodule.h \
  colvaratoms.h colvardeps.h
 $(COLVARS_OBJ_DIR)colvarbias_abf.o: colvarbias_abf.cpp colvarmodule.h \
  colvars_version.h colvar.h colvarvalue.h colvartypes.h colvarparse.h \
- colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarbias_abf.h colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
- colvarbias.h colvargrid.h colvar_UIestimator.h
+ colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarbias_abf.h colvarproxy.h \
+ colvarproxy_tcl.h colvarproxy_volmaps.h colvarbias.h colvargrid.h \
+ colvar_UIestimator.h
 $(COLVARS_OBJ_DIR)colvarbias_alb.o: colvarbias_alb.cpp colvarmodule.h \
  colvars_version.h colvarbias.h colvar.h colvarvalue.h colvartypes.h \
- colvarparse.h colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarbias_alb.h
+ colvarparse.h colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarbias_alb.h
 $(COLVARS_OBJ_DIR)colvarbias.o: colvarbias.cpp colvarmodule.h \
  colvars_version.h colvarproxy.h colvartypes.h colvarvalue.h \
  colvarproxy_tcl.h colvarproxy_volmaps.h colvarbias.h colvar.h \
- colvarparse.h colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvargrid.h
+ colvarparse.h colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvargrid.h
 $(COLVARS_OBJ_DIR)colvarbias_histogram.o: colvarbias_histogram.cpp \
  colvarmodule.h colvars_version.h colvarproxy.h colvartypes.h \
  colvarvalue.h colvarproxy_tcl.h colvarproxy_volmaps.h colvar.h \
- colvarparse.h colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarbias_histogram.h colvarbias.h colvargrid.h
+ colvarparse.h colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarbias_histogram.h colvarbias.h \
+ colvargrid.h
 $(COLVARS_OBJ_DIR)colvarbias_histogram_reweight_amd.o: \
  colvarbias_histogram_reweight_amd.cpp \
  colvarbias_histogram_reweight_amd.h colvarbias_histogram.h colvarbias.h \
  colvar.h colvarmodule.h colvars_version.h colvarvalue.h colvartypes.h \
- colvarparse.h colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvargrid.h colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h
+ colvarparse.h colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvargrid.h colvarproxy.h \
+ colvarproxy_tcl.h colvarproxy_volmaps.h
 $(COLVARS_OBJ_DIR)colvarbias_meta.o: colvarbias_meta.cpp colvarmodule.h \
  colvars_version.h colvarproxy.h colvartypes.h colvarvalue.h \
  colvarproxy_tcl.h colvarproxy_volmaps.h colvar.h colvarparse.h \
- colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarbias_meta.h colvarbias.h colvargrid.h
+ colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarbias_meta.h colvarbias.h \
+ colvargrid.h
 $(COLVARS_OBJ_DIR)colvarbias_restraint.o: colvarbias_restraint.cpp \
  colvarmodule.h colvars_version.h colvarproxy.h colvartypes.h \
  colvarvalue.h colvarproxy_tcl.h colvarproxy_volmaps.h \
  colvarbias_restraint.h colvarbias.h colvar.h colvarparse.h \
- colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h
+ colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h
 $(COLVARS_OBJ_DIR)colvarcomp_alchlambda.o: colvarcomp_alchlambda.cpp \
  colvarmodule.h colvars_version.h colvarvalue.h colvartypes.h \
  colvarparse.h colvarparams.h colvar.h colvardeps.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_angles.o: colvarcomp_angles.cpp \
  colvarmodule.h colvars_version.h colvar.h colvarvalue.h colvartypes.h \
- colvarparse.h colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ colvarparse.h colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_apath.o: colvarcomp_apath.cpp colvarmodule.h \
  colvars_version.h colvarvalue.h colvartypes.h colvarparse.h \
- colvarparams.h colvar.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ colvarparams.h colvar.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_coordnums.o: colvarcomp_coordnums.cpp \
  colvarmodule.h colvars_version.h colvarparse.h colvarvalue.h \
  colvartypes.h colvarparams.h colvaratoms.h colvarproxy.h \
  colvarproxy_tcl.h colvarproxy_volmaps.h colvardeps.h colvar.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvar_arithmeticpath.h colvar_geometricpath.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvar_arithmeticpath.h \
+ colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp.o: colvarcomp.cpp colvarmodule.h \
  colvars_version.h colvarvalue.h colvartypes.h colvar.h colvarparse.h \
- colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_distances.o: colvarcomp_distances.cpp \
  colvarmodule.h colvars_version.h colvarvalue.h colvartypes.h \
  colvarparse.h colvarparams.h colvar.h colvardeps.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_gpath.o: colvarcomp_gpath.cpp colvarmodule.h \
  colvars_version.h colvarvalue.h colvartypes.h colvarparse.h \
- colvarparams.h colvar.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ colvarparams.h colvar.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_neuralnetwork.o: \
  colvarcomp_neuralnetwork.cpp colvarmodule.h colvars_version.h \
  colvarvalue.h colvartypes.h colvarparse.h colvarparams.h colvar.h \
- colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h \
+ colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h \
  colvar_neuralnetworkcompute.h
 $(COLVARS_OBJ_DIR)colvarcomp_combination.o: colvarcomp_combination.cpp \
  colvarcomp.h colvarmodule.h colvars_version.h colvar.h colvarvalue.h \
  colvartypes.h colvarparse.h colvarparams.h colvardeps.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvaratoms.h colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
- colvar_arithmeticpath.h colvar_geometricpath.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvaratoms.h colvarproxy.h \
+ colvarproxy_tcl.h colvarproxy_volmaps.h colvar_arithmeticpath.h \
+ colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_protein.o: colvarcomp_protein.cpp \
  colvarmodule.h colvars_version.h colvarvalue.h colvartypes.h \
  colvarparse.h colvarparams.h colvar.h colvardeps.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_rotations.o: colvarcomp_rotations.cpp \
  colvarmodule.h colvars_version.h colvarvalue.h colvartypes.h \
  colvarparse.h colvarparams.h colvar.h colvardeps.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_volmaps.o: colvarcomp_volmaps.cpp \
  colvarmodule.h colvars_version.h colvarvalue.h colvartypes.h \
  colvarparse.h colvarparams.h colvar.h colvardeps.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvar.o: colvar.cpp colvarmodule.h colvars_version.h \
  colvarvalue.h colvartypes.h colvarparse.h colvarparams.h colvar.h \
- colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h \
- colvarscript.h colvarbias.h colvarscript_commands.h \
- colvarscript_commands_colvar.h colvarscript_commands_bias.h
+ colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h colvarscript.h \
+ colvarbias.h colvarscript_commands.h colvarscript_commands_colvar.h \
+ colvarscript_commands_bias.h
 $(COLVARS_OBJ_DIR)colvardeps.o: colvardeps.cpp colvarmodule.h \
  colvars_version.h colvarproxy.h colvartypes.h colvarvalue.h \
  colvarproxy_tcl.h colvarproxy_volmaps.h colvardeps.h colvarparse.h \
  colvarparams.h
 $(COLVARS_OBJ_DIR)colvargrid.o: colvargrid.cpp colvarmodule.h \
  colvars_version.h colvarvalue.h colvartypes.h colvarparse.h \
- colvarparams.h colvar.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h \
- colvargrid.h
+ colvarparams.h colvar.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h colvargrid.h
 $(COLVARS_OBJ_DIR)colvarmodule.o: colvarmodule.cpp colvarmodule.h \
  colvars_version.h colvarparse.h colvarvalue.h colvartypes.h \
  colvarparams.h colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
- colvar.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarbias.h colvarbias_abf.h colvargrid.h colvar_UIestimator.h \
- colvarbias_alb.h colvarbias_histogram.h \
- colvarbias_histogram_reweight_amd.h colvarbias_meta.h \
- colvarbias_restraint.h colvarscript.h colvarscript_commands.h \
- colvarscript_commands_colvar.h colvarscript_commands_bias.h \
- colvaratoms.h colvarcomp.h colvar_arithmeticpath.h \
- colvar_geometricpath.h colvarmodule_refs.h
+ colvar.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarbias.h colvarbias_abf.h \
+ colvargrid.h colvar_UIestimator.h colvarbias_alb.h \
+ colvarbias_histogram.h colvarbias_histogram_reweight_amd.h \
+ colvarbias_meta.h colvarbias_restraint.h colvarscript.h \
+ colvarscript_commands.h colvarscript_commands_colvar.h \
+ colvarscript_commands_bias.h colvaratoms.h colvarcomp.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h colvarmodule_refs.h
 $(COLVARS_OBJ_DIR)colvarparams.o: colvarparams.cpp colvarmodule.h \
  colvars_version.h colvarvalue.h colvartypes.h colvarparams.h
 $(COLVARS_OBJ_DIR)colvarparse.o: colvarparse.cpp colvarmodule.h \
@@ -335,17 +375,19 @@ $(COLVARS_OBJ_DIR)colvarproxy.o: colvarproxy.cpp colvarmodule.h \
  colvars_version.h colvarproxy.h colvartypes.h colvarvalue.h \
  colvarproxy_tcl.h colvarproxy_volmaps.h colvarscript.h colvarbias.h \
  colvar.h colvarparse.h colvarparams.h colvardeps.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarscript_commands.h colvarscript_commands_colvar.h \
- colvarscript_commands_bias.h colvaratoms.h colvarmodule_utils.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarscript_commands.h \
+ colvarscript_commands_colvar.h colvarscript_commands_bias.h \
+ colvaratoms.h colvarmodule_utils.h
 $(COLVARS_OBJ_DIR)colvarproxy_replicas.o: colvarproxy_replicas.cpp \
  colvarmodule.h colvars_version.h colvarproxy.h colvartypes.h \
  colvarvalue.h colvarproxy_tcl.h colvarproxy_volmaps.h
@@ -360,64 +402,68 @@ $(COLVARS_OBJ_DIR)colvarscript.o: colvarscript.cpp colvarproxy.h \
  colvarmodule.h colvars_version.h colvartypes.h colvarvalue.h \
  colvarproxy_tcl.h colvarproxy_volmaps.h colvardeps.h colvarparse.h \
  colvarparams.h colvarscript.h colvarbias.h colvar.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarscript_commands.h colvarscript_commands_colvar.h \
- colvarscript_commands_bias.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarscript_commands.h \
+ colvarscript_commands_colvar.h colvarscript_commands_bias.h
 $(COLVARS_OBJ_DIR)colvarscript_commands.o: colvarscript_commands.cpp \
  colvarproxy.h colvarmodule.h colvars_version.h colvartypes.h \
  colvarvalue.h colvarproxy_tcl.h colvarproxy_volmaps.h colvardeps.h \
  colvarparse.h colvarparams.h colvarscript.h colvarbias.h colvar.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarscript_commands.h colvarscript_commands_colvar.h \
- colvarscript_commands_bias.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarscript_commands.h \
+ colvarscript_commands_colvar.h colvarscript_commands_bias.h
 $(COLVARS_OBJ_DIR)colvarscript_commands_bias.o: \
  colvarscript_commands_bias.cpp colvarproxy.h colvarmodule.h \
  colvars_version.h colvartypes.h colvarvalue.h colvarproxy_tcl.h \
  colvarproxy_volmaps.h colvardeps.h colvarparse.h colvarparams.h \
- colvarscript.h colvarbias.h colvar.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarscript_commands.h colvarscript_commands_colvar.h \
- colvarscript_commands_bias.h
+ colvarscript.h colvarbias.h colvar.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarscript_commands.h \
+ colvarscript_commands_colvar.h colvarscript_commands_bias.h
 $(COLVARS_OBJ_DIR)colvarscript_commands_colvar.o: \
  colvarscript_commands_colvar.cpp colvarproxy.h colvarmodule.h \
  colvars_version.h colvartypes.h colvarvalue.h colvarproxy_tcl.h \
  colvarproxy_volmaps.h colvardeps.h colvarparse.h colvarparams.h \
- colvarscript.h colvarbias.h colvar.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarscript_commands.h colvarscript_commands_colvar.h \
- colvarscript_commands_bias.h
+ colvarscript.h colvarbias.h colvar.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarscript_commands.h \
+ colvarscript_commands_colvar.h colvarscript_commands_bias.h
 $(COLVARS_OBJ_DIR)colvartypes.o: colvartypes.cpp colvarmodule.h \
  colvars_version.h colvartypes.h colvarparse.h colvarvalue.h \
  colvarparams.h ../../src/math_eigen_impl.h
@@ -425,14 +471,15 @@ $(COLVARS_OBJ_DIR)colvarvalue.o: colvarvalue.cpp colvarmodule.h \
  colvars_version.h colvarvalue.h colvartypes.h
 $(COLVARS_OBJ_DIR)colvar_neuralnetworkcompute.o: \
  colvar_neuralnetworkcompute.cpp colvar_neuralnetworkcompute.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarparse.h colvarmodule.h colvars_version.h colvarvalue.h \
- colvartypes.h colvarparams.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarparse.h colvarmodule.h \
+ colvars_version.h colvarvalue.h colvartypes.h colvarparams.h
diff --git a/lib/colvars/Makefile.lepton.deps b/lib/colvars/Makefile.lepton.deps
deleted file mode 100644
index 4546339de6..0000000000
--- a/lib/colvars/Makefile.lepton.deps
+++ /dev/null
@@ -1,50 +0,0 @@
-
-$(COLVARS_OBJ_DIR)CompiledExpression.o: lepton/src/CompiledExpression.cpp \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h
-$(COLVARS_OBJ_DIR)CompiledVectorExpression.o: \
- lepton/src/CompiledVectorExpression.cpp \
- lepton/include/lepton/CompiledVectorExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h
-$(COLVARS_OBJ_DIR)ExpressionProgram.o: lepton/src/ExpressionProgram.cpp \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h
-$(COLVARS_OBJ_DIR)ExpressionTreeNode.o: lepton/src/ExpressionTreeNode.cpp \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/Exception.h lepton/include/lepton/Operation.h \
- lepton/include/lepton/CustomFunction.h lepton/include/lepton/Exception.h
-$(COLVARS_OBJ_DIR)Operation.o: lepton/src/Operation.cpp \
- lepton/include/lepton/Operation.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h lepton/include/lepton/Exception.h \
- lepton/include/lepton/ExpressionTreeNode.h lepton/src/MSVC_erfc.h
-$(COLVARS_OBJ_DIR)ParsedExpression.o: lepton/src/ParsedExpression.cpp \
- lepton/include/lepton/ParsedExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/CompiledVectorExpression.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h
-$(COLVARS_OBJ_DIR)Parser.o: lepton/src/Parser.cpp \
- lepton/include/lepton/Parser.h lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h lepton/include/lepton/Exception.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h
diff --git a/lib/colvars/README b/lib/colvars/README
index eeba557ce3..cd1e08b6de 100644
--- a/lib/colvars/README
+++ b/lib/colvars/README
@@ -35,39 +35,32 @@ The reference article is:
 The Colvars library can be built for the most part with all major versions of
 the C++ language.
 
-A few of the most recent features require C++11 support.  In particular, the
-library is optionally built together with the
-"Lepton"_https://simtk.org/projects/lepton library, a copy of which is also
-included in the LAMMPS distribution.  Lepton implements the
-"customFunction"_http://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html#colvar|customFunction
-feature, and requires C++11 support.
-
-See "here"_https://colvars.github.io/README-c++11.html for a detailed list of
-C++11-only features.
-
+A few of the most recent features require C++11 support, which is also required
+by LAMMPS, so no additional notes are needed.
 
 ## How to build (CMake)
 
 This is the recommended build recipe: no additional settings are normally
 needed besides "-D PKG_COLVARS=yes".
 
-Building and linking of Lepton (or other C++11-only features) is enabled
-automatically when compilation is carried out with C++11 support, and disabled
-otherwise.  Optionally, Lepton build may be manually controlled with the flag
-"-D COLVARS_LEPTON=yes|no".
-
+Linking to the Lepton library, which is also used by the LEPTON LAMMPS package,
+is enabled automatically.  Optionally, support for Lepton within Colvars may
+be manually controlled with the￼CMake setting "-D COLVARS_LEPTON=yes|no".
 
 ## How to build (traditional make)
 
-Before building LAMMPS, one must build the Colvars library in lib/colvars.
+Before building LAMMPS, one must build the Colvars library in lib/colvars
+and the Lepton library in lib/lepton.  For building Lepton please see the
+README.md file in the lib/lepton folder.
 
-This can be done manually in the same folder by using or adapting one of the
-provided Makefiles: for example, Makefile.g++ for the GNU compiler.
+Building the Colvars library can be done manually in the respective
+folders by using or adapting one of the provided Makefiles: for example,
+Makefile.g++ for the GNU compiler.
 
 In general, it is safer to use build setting consistent with the rest of
 LAMMPS.  This is best carried out from the LAMMPS src directory using a
-command like these, which simply invoke the lib/colvars/Install.py script with
-the specified args:
+command like these, which simply invoke the lib/colvars/Install.py script
+with the specified args:
 
 make lib-colvars                      # print help message
 make lib-colvars args="-m serial"     # build with GNU g++ compiler (settings as with "make serial")
diff --git a/lib/lepton/Common.mk b/lib/lepton/Common.mk
new file mode 100644
index 0000000000..3eead392a4
--- /dev/null
+++ b/lib/lepton/Common.mk
@@ -0,0 +1,123 @@
+# -*- makefile -*-
+# common settings for Lepton library makefiles
+
+SRC= \
+    src/CompiledExpression.cpp \
+    src/CompiledVectorExpression.cpp \
+    src/ExpressionProgram.cpp \
+    src/ExpressionTreeNode.cpp \
+    src/Operation.cpp \
+    src/ParsedExpression.cpp \
+    src/Parser.cpp
+OBJ=$(SRC:src/%.cpp=build/lepton.%.o)
+
+JITARM= \
+    asmjit/arm/a64assembler.cpp \
+    asmjit/arm/a64builder.cpp \
+    asmjit/arm/a64compiler.cpp \
+    asmjit/arm/a64emithelper.cpp \
+    asmjit/arm/a64formatter.cpp \
+    asmjit/arm/a64func.cpp \
+    asmjit/arm/a64instapi.cpp \
+    asmjit/arm/a64instdb.cpp \
+    asmjit/arm/a64operand.cpp \
+    asmjit/arm/a64rapass.cpp \
+    asmjit/arm/armformatter.cpp
+JITX86 = \
+    asmjit/x86/x86assembler.cpp \
+    asmjit/x86/x86builder.cpp \
+    asmjit/x86/x86compiler.cpp \
+    asmjit/x86/x86emithelper.cpp \
+    asmjit/x86/x86formatter.cpp \
+    asmjit/x86/x86func.cpp \
+    asmjit/x86/x86instapi.cpp \
+    asmjit/x86/x86instdb.cpp \
+    asmjit/x86/x86operand.cpp \
+    asmjit/x86/x86rapass.cpp
+JITCORE= \
+    asmjit/core/archtraits.cpp \
+    asmjit/core/assembler.cpp \
+    asmjit/core/builder.cpp \
+    asmjit/core/codeholder.cpp \
+    asmjit/core/codewriter.cpp \
+    asmjit/core/compiler.cpp \
+    asmjit/core/constpool.cpp \
+    asmjit/core/cpuinfo.cpp \
+    asmjit/core/emithelper.cpp \
+    asmjit/core/emitter.cpp \
+    asmjit/core/emitterutils.cpp \
+    asmjit/core/environment.cpp \
+    asmjit/core/errorhandler.cpp \
+    asmjit/core/formatter.cpp \
+    asmjit/core/funcargscontext.cpp \
+    asmjit/core/func.cpp \
+    asmjit/core/globals.cpp \
+    asmjit/core/inst.cpp \
+    asmjit/core/jitallocator.cpp \
+    asmjit/core/jitruntime.cpp \
+    asmjit/core/logger.cpp \
+    asmjit/core/operand.cpp \
+    asmjit/core/osutils.cpp \
+    asmjit/core/ralocal.cpp \
+    asmjit/core/rapass.cpp \
+    asmjit/core/rastack.cpp \
+    asmjit/core/string.cpp \
+    asmjit/core/support.cpp \
+    asmjit/core/target.cpp \
+    asmjit/core/type.cpp \
+    asmjit/core/virtmem.cpp \
+    asmjit/core/zone.cpp \
+    asmjit/core/zonehash.cpp \
+    asmjit/core/zonelist.cpp \
+    asmjit/core/zonestack.cpp \
+    asmjit/core/zonetree.cpp \
+    asmjit/core/zonevector.cpp
+
+JITOBJ=$(JITX86:asmjit/x86/%.cpp=build/x86.%.o) \
+       $(JITARM:asmjit/arm/%.cpp=build/arm.%.o) \
+       $(JIXCORE:asmjit/core/%.cpp=build/core.%.o)
+
+LEPTON_DIR=.
+
+include $(LEPTON_DIR)/Settings.mk
+
+EXTRAMAKE=Makefile.lammps.empty
+LIB=liblepton.a
+
+ifeq ($(ENABLE_JIT),1)
+OBJ += $(JITOBJ)
+endif
+
+INC += $(LEPTON_INC)
+CXXFLAGS += $(LEPTON_DEF)
+
+all: $(LIB) Makefile.lammps
+
+build:
+	mkdir -p build
+
+build/lepton.%.o: src/%.cpp build
+	$(CXX) $(INC) $(CXXFLAGS) -c $< -o $@
+
+build/arm.%.o: asmjit/arm/%.cpp build
+	$(CXX) $(INC) $(CXXFLAGS) -c $< -o $@
+
+build/x86.%.o: asmjit/x86/%.cpp build
+	$(CXX) $(INC) $(CXXFLAGS) -c $< -o $@
+
+build/core.%.o: asmjit/core/%.cpp build
+	$(CXX) $(INC) $(CXXFLAGS) -c $< -o $@
+
+Makefile.lammps:
+	cp $(EXTRAMAKE) $@
+	sed -i -e 's,^.*lepton_SYSINC *=.*$$,lepton_SYSINC = $(DEF),' $@
+
+.PHONY: all lib clean
+
+$(LIB) : $(OBJ)
+	$(AR) $(ARFLAGS) $@ $^
+
+clean:
+	rm -f build/*.o $(LIB) *~ Makefile.lammps
+
+
diff --git a/lib/lepton/Install.py b/lib/lepton/Install.py
new file mode 100755
index 0000000000..34e01ad339
--- /dev/null
+++ b/lib/lepton/Install.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+"""
+Install.py tool to build the Lepton library
+"""
+
+from __future__ import print_function
+import sys, os, subprocess
+from argparse import ArgumentParser
+
+sys.path.append('..')
+from install_helpers import get_cpus, fullpath
+
+parser = ArgumentParser(prog='Install.py',
+                        description="LAMMPS Lepton library build wrapper script")
+
+HELP = """
+Syntax from src dir: make lib-lepton args="-m machine"
+Syntax from lib dir: python Install.py -m machine
+
+specify -m
+
+Examples:
+
+make lib-lepton args="-m serial" # build Lepton lib with same settings as in the serial Makefile in src
+python Install.py -m mpi         # build Lepton lib with same settings as in the mpi Makefile in src
+"""
+
+# parse and process arguments
+
+parser.add_argument("-m", "--machine",
+                    help="suffix of a <libname>/Makefile.* file used for compiling this library")
+
+args = parser.parse_args()
+
+# print help message and exit, if neither build nor path options are given
+if not args.machine:
+  parser.print_help()
+  sys.exit(HELP)
+
+machine = args.machine
+
+# set lib from working dir
+
+cwd = fullpath('.')
+lib = os.path.basename(cwd)
+
+if not os.path.exists("Makefile.%s" % machine):
+  sys.exit("lib/%s/Makefile.%s does not exist" % (lib, machine))
+
+# make the library with parallel make
+n_cpus = get_cpus()
+
+print("Building lib%s.a ..." % lib)
+cmd = "make -f Makefile.%s clean; make -f Makefile.%s -j%d" % (machine, machine, n_cpus)
+try:
+  txt = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT)
+  print(txt.decode('UTF-8'))
+except subprocess.CalledProcessError as e:
+  print("Make failed with:\n %s" % e.output.decode('UTF-8'))
+  sys.exit(1)
+
+if os.path.exists("lib%s.a" % lib):
+  print("Build was successful")
+else:
+  sys.exit("Build of lib/%s/lib%s.a was NOT successful" % (lib, lib))
+
+if not os.path.exists("Makefile.lammps"):
+  print("WARNING: lib/%s/Makefile.lammps was NOT created" % lib)
diff --git a/lib/lepton/LICENSE b/lib/lepton/LICENSE
new file mode 100644
index 0000000000..6359209705
--- /dev/null
+++ b/lib/lepton/LICENSE
@@ -0,0 +1,20 @@
+Portions copyright (c) 2009-2019 Stanford University and the Authors.
+Authors: Peter Eastman and OpenMM contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/lib/lepton/Makefile.lammps.empty b/lib/lepton/Makefile.lammps.empty
new file mode 100644
index 0000000000..57d5846c1b
--- /dev/null
+++ b/lib/lepton/Makefile.lammps.empty
@@ -0,0 +1,5 @@
+# Settings that the LAMMPS build will import when this package library is used
+
+lepton_SYSINC =
+lepton_SYSLIB =
+lepton_SYSPATH =
diff --git a/lib/lepton/Makefile.mpi b/lib/lepton/Makefile.mpi
new file mode 100644
index 0000000000..ee5d9aafae
--- /dev/null
+++ b/lib/lepton/Makefile.mpi
@@ -0,0 +1,8 @@
+# -*- makefile -*-
+
+CC=mpicxx
+CXXFLAGS=-D_DEFAULT_SOURCE -O2 -Wall -fPIC -std=c++11
+AR=ar
+ARFLAGS=rc
+
+include Common.mk
diff --git a/lib/lepton/Makefile.serial b/lib/lepton/Makefile.serial
new file mode 100644
index 0000000000..bebe2ce873
--- /dev/null
+++ b/lib/lepton/Makefile.serial
@@ -0,0 +1,8 @@
+# -*- makefile -*-
+
+CC=g++
+CXXFLAGS=-D_DEFAULT_SOURCE -O3 -DNDEBUG -Wall -fPIC -std=c++11 -ffast-math -msse4.2
+AR=ar
+ARFLAGS=rc
+
+include Common.mk
diff --git a/lib/lepton/README.md b/lib/lepton/README.md
new file mode 100644
index 0000000000..d83fe7ffc1
--- /dev/null
+++ b/lib/lepton/README.md
@@ -0,0 +1,28 @@
+This directory contains the lepton library from the OpenMM software
+which allows to efficiently evaluate mathematical expressions from
+strings.  This library is used with the LEPTON package that support
+force styles within LAMMPS that make use of this library.
+
+You can type "make lib-lepton" from the src directory to see help on how
+to build this library via make commands, or you can do the same thing
+by typing "python Install.py" from within this directory, or you can
+do it manually by following the instructions below.
+
+---------------------
+
+Lepton (short for “lightweight expression parser”) is a C++ library for
+parsing, evaluating, differentiating, and analyzing mathematical
+expressions. It takes expressions in the form of text strings, then
+converts them to an internal representation suitable for evaluation or
+analysis. Here are some of its major features:
+
+- Support for a large number of standard mathematical functions and operations.
+- Support for user defined custom functions.
+- A variety of optimizations for automatically simplifying expressions.
+- Computing analytic derivatives.
+- Representing parsed expressions in two different forms (tree or program) suitable for
+  further analysis or processing.
+- Support for just-in-time compilation via asmjit library on x86 (autodetected)
+  This should make evaluation about 2 times faster
+
+Lepton was originally created for use in the [OpenMM project](https://openmm.org)
diff --git a/lib/lepton/Settings.mk b/lib/lepton/Settings.mk
new file mode 100644
index 0000000000..d7fd0ac22e
--- /dev/null
+++ b/lib/lepton/Settings.mk
@@ -0,0 +1,17 @@
+# makefile variables and settings related to configuring JIT with Lepton.
+
+ENABLE_JIT=0
+ifeq ($(shell uname -m),x86_64)
+ENABLE_JIT=1
+endif
+ifeq ($(shell uname -m),amd64)
+ENABLE_JIT=1
+endif
+
+LEPTON_INC = -I$(LEPTON_DIR)/include
+LEPTON_DEF = -DLEPTON_BUILDING_STATIC_LIBRARY=1
+
+ifeq ($(ENABLE_JIT),1)
+LEPTON_INC += -I$(LEPTON_DIR)
+LEPTON_DEF += -DLEPTON_USE_JIT=1 -DASMJIT_BUILD_X86=1 -DASMJIT_STATIC=1 -DASMJIT_BUILD_RELEASE=1
+endif
diff --git a/lib/lepton/asmjit/LICENSE.md b/lib/lepton/asmjit/LICENSE.md
new file mode 100644
index 0000000000..020a569dbd
--- /dev/null
+++ b/lib/lepton/asmjit/LICENSE.md
@@ -0,0 +1,17 @@
+Copyright (c) 2008-2020 The AsmJit Authors
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
diff --git a/lib/lepton/asmjit/a64.h b/lib/lepton/asmjit/a64.h
new file mode 100644
index 0000000000..ea4d304f05
--- /dev/null
+++ b/lib/lepton/asmjit/a64.h
@@ -0,0 +1,62 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_A64_H_INCLUDED
+#define ASMJIT_A64_H_INCLUDED
+
+//! \addtogroup asmjit_a64
+//!
+//! ### Emitters
+//!
+//!   - \ref a64::Assembler - AArch64 assembler (must read, provides examples).
+//!   - \ref a64::Builder - AArch64 builder.
+//!   - \ref a64::Compiler - AArch64 compiler.
+//!   - \ref a64::Emitter - AArch64 emitter (abstract).
+//!
+//! ### Supported Instructions
+//!
+//!   - Emitters:
+//!     - \ref a64::EmitterExplicitT - Provides all instructions that use explicit
+//!       operands, provides also utility functions. The member functions provided
+//!       are part of all ARM/AArch64 emitters.
+//!
+//!   - Instruction representation:
+//!     - \ref a64::Inst::Id - instruction identifiers.
+//!
+//! ### Register Operands
+//!
+//!   - \ref arm::Reg - Base class for any AArch32/AArch64 register.
+//!     - \ref arm::Gp - General purpose register:
+//!       - \ref arm::GpW - 32-bit register.
+//!       - \ref arm::GpX - 64-bit register.
+//!     - \ref arm::Vec - Vector (SIMD) register:
+//!       - \ref arm::VecB - 8-bit SIMD register (AArch64 only).
+//!       - \ref arm::VecH - 16-bit SIMD register (AArch64 only).
+//!       - \ref arm::VecS - 32-bit SIMD register.
+//!       - \ref arm::VecD - 64-bit SIMD register.
+//!       - \ref arm::VecV - 128-bit SIMD register.
+//!
+//! ### Memory Operands
+//!
+//!   - \ref arm::Mem - AArch32/AArch64 memory operand that provides support for all ARM addressing features
+//!     including base, index, pre/post increment, and ARM-specific shift addressing and index extending.
+//!
+//! ### Other
+//!
+//!   - \ref arm::Shift - Shift operation and value.
+//!   - \ref a64::Utils - Utilities that can help during code generation for AArch64.
+
+#include "./arm.h"
+#include "./arm/a64assembler.h"
+#include "./arm/a64builder.h"
+#include "./arm/a64compiler.h"
+#include "./arm/a64emitter.h"
+#include "./arm/a64globals.h"
+#include "./arm/a64instdb.h"
+#include "./arm/a64operand.h"
+#include "./arm/a64utils.h"
+
+#endif // ASMJIT_A64_H_INCLUDED
+
diff --git a/lib/lepton/asmjit/arm.h b/lib/lepton/asmjit/arm.h
new file mode 100644
index 0000000000..57ffa815b8
--- /dev/null
+++ b/lib/lepton/asmjit/arm.h
@@ -0,0 +1,62 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_H_INCLUDED
+#define ASMJIT_ARM_H_INCLUDED
+
+//! \addtogroup asmjit_arm
+//!
+//! ### Namespaces
+//!
+//!   - \ref arm - arm namespace provides common functionality for both AArch32 and AArch64 backends.
+//!   - \ref a64 - a64 namespace provides support for AArch64 architecture. In addition it includes
+//!     \ref arm namespace, so you can only use a single namespace when targeting AArch64 architecture.
+//!
+//! ### Emitters
+//!
+//!   - AArch64
+//!     - \ref a64::Assembler - AArch64 assembler (must read, provides examples).
+//!     - \ref a64::Builder - AArch64 builder.
+//!     - \ref a64::Compiler - AArch64 compiler.
+//!     - \ref a64::Emitter - AArch64 emitter (abstract).
+//!
+//! ### Supported Instructions
+//!
+//!   - AArch64:
+//!     - Emitters:
+//!       - \ref a64::EmitterExplicitT - Provides all instructions that use explicit operands, provides also
+//!         utility functions. The member functions provided are part of all AArch64 emitters.
+//!     - Instruction representation:
+//!       - \ref a64::Inst::Id - instruction identifiers.
+//!
+//! ### Register Operands
+//!
+//!   - \ref arm::Reg - Base class for any AArch32/AArch64 register.
+//!     - \ref arm::Gp - General purpose register:
+//!       - \ref arm::GpW - 32-bit register.
+//!       - \ref arm::GpX - 64-bit register.
+//!     - \ref arm::Vec - Vector (SIMD) register:
+//!       - \ref arm::VecB - 8-bit SIMD register (AArch64 only).
+//!       - \ref arm::VecH - 16-bit SIMD register (AArch64 only).
+//!       - \ref arm::VecS - 32-bit SIMD register.
+//!       - \ref arm::VecD - 64-bit SIMD register.
+//!       - \ref arm::VecV - 128-bit SIMD register.
+//!
+//! ### Memory Operands
+//!
+//!   - \ref arm::Mem - AArch32/AArch64 memory operand that provides support for all ARM addressing features
+//!     including base, index, pre/post increment, and ARM-specific shift addressing and index extending.
+//!
+//! ### Other
+//!
+//!   - \ref arm::Shift - Shift operation and value (both AArch32 and AArch64).
+//!   - \ref arm::DataType - Data type that is part of an instruction in AArch32 mode.
+//!   - \ref a64::Utils - Utilities that can help during code generation for AArch64.
+
+#include "./core.h"
+#include "./arm/armglobals.h"
+#include "./arm/armoperand.h"
+
+#endif // ASMJIT_ARM_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64archtraits_p.h b/lib/lepton/asmjit/arm/a64archtraits_p.h
new file mode 100644
index 0000000000..87559c71d5
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64archtraits_p.h
@@ -0,0 +1,81 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64ARCHTRAITS_P_H_INCLUDED
+#define ASMJIT_ARM_A64ARCHTRAITS_P_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/misc_p.h"
+#include "../core/type.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+static const constexpr ArchTraits a64ArchTraits = {
+  // SP/FP/LR/PC.
+  Gp::kIdSp, Gp::kIdFp, Gp::kIdLr, 0xFF,
+
+  // Reserved.
+  { 0, 0, 0 },
+
+  // HW stack alignment (AArch64 requires stack aligned to 64 bytes).
+  16,
+
+  // Min/max stack offset - byte addressing is the worst, VecQ addressing the best.
+  4095, 65520,
+
+  // Instruction hints [Gp, Vec, ExtraVirt2, ExtraVirt3].
+  {{
+    InstHints::kPushPop,
+    InstHints::kPushPop,
+    InstHints::kNoHints,
+    InstHints::kNoHints
+  }},
+
+  // RegInfo.
+  #define V(index) OperandSignature{arm::RegTraits<RegType(index)>::kSignature}
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // RegTypeToTypeId.
+  #define V(index) TypeId(arm::RegTraits<RegType(index)>::kTypeId)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // TypeIdToRegType.
+  #define V(index) (index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt8)    ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt8)   ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt16)   ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt16)  ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt32)   ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt32)  ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt64)   ? RegType::kARM_GpX   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt64)  ? RegType::kARM_GpX   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kIntPtr)  ? RegType::kARM_GpX   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUIntPtr) ? RegType::kARM_GpX   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat32) ? RegType::kARM_VecS  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat64) ? RegType::kARM_VecD  : RegType::kNone)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // Word names of 8-bit, 16-bit, 32-bit, and 64-bit quantities.
+  {
+    ArchTypeNameId::kByte,
+    ArchTypeNameId::kHWord,
+    ArchTypeNameId::kWord,
+    ArchTypeNameId::kXWord
+  }
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64ARCHTRAITS_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64assembler.cpp b/lib/lepton/asmjit/arm/a64assembler.cpp
new file mode 100644
index 0000000000..485f05f491
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64assembler.cpp
@@ -0,0 +1,5115 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64)
+
+#include "../core/codewriter_p.h"
+#include "../core/cpuinfo.h"
+#include "../core/emitterutils_p.h"
+#include "../core/formatter.h"
+#include "../core/logger.h"
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../arm/armformatter_p.h"
+#include "../arm/a64assembler.h"
+#include "../arm/a64emithelper_p.h"
+#include "../arm/a64instdb_p.h"
+#include "../arm/a64utils.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::Assembler - Cond
+// =====================
+
+static inline uint32_t condCodeToOpcodeCond(uint32_t cond) noexcept {
+  return (uint32_t(cond) - 2u) & 0xFu;
+}
+
+// a64::Assembler - Bits
+// =====================
+
+template<typename T>
+static inline constexpr uint32_t B(const T& index) noexcept { return uint32_t(1u) << uint32_t(index); }
+
+static constexpr uint32_t kSP = Gp::kIdSp;
+static constexpr uint32_t kZR = Gp::kIdZr;
+static constexpr uint32_t kWX = InstDB::kWX;
+
+// a64::Assembler - ShiftOpToLdStOptMap
+// ====================================
+
+// Table that maps ShiftOp to OPT part in LD/ST (register) opcode.
+#define VALUE(index) index == uint32_t(ShiftOp::kUXTW) ? 2u : \
+                     index == uint32_t(ShiftOp::kLSL)  ? 3u : \
+                     index == uint32_t(ShiftOp::kSXTW) ? 6u : \
+                     index == uint32_t(ShiftOp::kSXTX) ? 7u : 0xFF
+static const uint8_t armShiftOpToLdStOptMap[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) };
+#undef VALUE
+
+static inline constexpr uint32_t diff(RegType a, RegType b) noexcept {
+  return uint32_t(a) - uint32_t(b);
+}
+
+// asmjit::a64::Assembler - SizeOp
+// ===============================
+
+//! Struct that contains Size (2 bits), Q flag, and S (scalar) flag. These values
+//! are used to encode Q, Size, and Scalar fields in an opcode.
+struct SizeOp {
+  enum : uint8_t {
+    k128BitShift = 0,
+    kScalarShift = 1,
+    kSizeShift = 2,
+
+    kQ = uint8_t(1u << k128BitShift),
+    kS = uint8_t(1u << kScalarShift),
+
+    k00 = uint8_t(0 << kSizeShift),
+    k01 = uint8_t(1 << kSizeShift),
+    k10 = uint8_t(2 << kSizeShift),
+    k11 = uint8_t(3 << kSizeShift),
+
+    k00Q = k00 | kQ,
+    k01Q = k01 | kQ,
+    k10Q = k10 | kQ,
+    k11Q = k11 | kQ,
+
+    k00S = k00 | kS,
+    k01S = k01 | kS,
+    k10S = k10 | kS,
+    k11S = k11 | kS,
+
+    kInvalid = 0xFFu,
+
+    // Masks used by SizeOpMap.
+    kSzQ = (0x3u << kSizeShift) | kQ,
+    kSzS = (0x3u << kSizeShift) | kS,
+    kSzQS = (0x3u << kSizeShift) | kQ | kS
+  };
+
+  uint8_t value;
+
+  inline bool isValid() const noexcept { return value != kInvalid; }
+  inline void makeInvalid() noexcept { value = kInvalid; }
+
+  inline uint32_t q() const noexcept { return (value >> k128BitShift) & 0x1u; }
+  inline uint32_t qs() const noexcept { return ((value >> k128BitShift) | (value >> kScalarShift)) & 0x1u; }
+  inline uint32_t scalar() const noexcept { return (value >> kScalarShift) & 0x1u; }
+  inline uint32_t size() const noexcept { return (value >> kSizeShift) & 0x3u; }
+
+  inline void decrementSize() noexcept {
+    ASMJIT_ASSERT(size() > 0);
+    value = uint8_t(value - (1u << kSizeShift));
+  }
+};
+
+struct SizeOpTable {
+  enum TableId : uint8_t {
+    kTableBin = 0,
+    kTableAny,
+    kCount
+  };
+
+  // 40 elements for each combination.
+  SizeOp array[(uint32_t(RegType::kARM_VecV) - uint32_t(RegType::kARM_VecB) + 1) * 8];
+};
+
+#define VALUE_BIN(x) { \
+  x == (((uint32_t(RegType::kARM_VecD) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeNone)) ? SizeOp::k00  : \
+  x == (((uint32_t(RegType::kARM_VecV) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeNone)) ? SizeOp::k00Q : \
+  x == (((uint32_t(RegType::kARM_VecD) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeB   )) ? SizeOp::k00  : \
+  x == (((uint32_t(RegType::kARM_VecV) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeB   )) ? SizeOp::k00Q : SizeOp::kInvalid \
+}
+
+#define VALUE_ANY(x) { \
+  x == (((uint32_t(RegType::kARM_VecB) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeNone)) ? SizeOp::k00S : \
+  x == (((uint32_t(RegType::kARM_VecH) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeNone)) ? SizeOp::k01S : \
+  x == (((uint32_t(RegType::kARM_VecS) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeNone)) ? SizeOp::k10S : \
+  x == (((uint32_t(RegType::kARM_VecD) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeNone)) ? SizeOp::k11S : \
+  x == (((uint32_t(RegType::kARM_VecD) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeB   )) ? SizeOp::k00  : \
+  x == (((uint32_t(RegType::kARM_VecV) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeB   )) ? SizeOp::k00Q : \
+  x == (((uint32_t(RegType::kARM_VecD) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeH   )) ? SizeOp::k01  : \
+  x == (((uint32_t(RegType::kARM_VecV) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeH   )) ? SizeOp::k01Q : \
+  x == (((uint32_t(RegType::kARM_VecD) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeS   )) ? SizeOp::k10  : \
+  x == (((uint32_t(RegType::kARM_VecV) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeS   )) ? SizeOp::k10Q : \
+  x == (((uint32_t(RegType::kARM_VecD) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeD   )) ? SizeOp::k11S : \
+  x == (((uint32_t(RegType::kARM_VecV) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeD   )) ? SizeOp::k11Q : SizeOp::kInvalid \
+}
+
+static const SizeOpTable sizeOpTable[SizeOpTable::kCount] = {
+  {{ ASMJIT_LOOKUP_TABLE_40(VALUE_BIN, 0) }},
+  {{ ASMJIT_LOOKUP_TABLE_40(VALUE_ANY, 0) }}
+};
+
+#undef VALUE_ANY
+#undef VALUE_BIN
+
+struct SizeOpMap {
+  uint8_t tableId;
+  uint8_t sizeOpMask;
+  uint16_t acceptMask;
+};
+
+static const constexpr SizeOpMap sizeOpMap[InstDB::kVO_Count] = {
+  { // kVO_V_B:
+    SizeOpTable::kTableBin, SizeOp::kQ   , uint16_t(B(SizeOp::k00) | B(SizeOp::k00Q))
+  },
+
+  { // kVO_V_BH:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k00Q) | B(SizeOp::k01) | B(SizeOp::k01Q))
+  },
+
+  { // kVO_V_BH_4S:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k00Q) | B(SizeOp::k01) | B(SizeOp::k01Q) | B(SizeOp::k10Q))
+  },
+
+  { // kVO_V_BHS:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k00Q) | B(SizeOp::k01) | B(SizeOp::k01Q) | B(SizeOp::k10) | B(SizeOp::k10Q))
+  },
+
+  { // kVO_V_BHS_D2:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k00Q) | B(SizeOp::k01) | B(SizeOp::k01Q) | B(SizeOp::k10) | B(SizeOp::k10Q) | B(SizeOp::k11Q))
+  },
+
+  { // kVO_V_HS:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k01) | B(SizeOp::k01Q) | B(SizeOp::k10) | B(SizeOp::k10Q))
+  },
+
+  { // kVO_V_S:
+    SizeOpTable::kTableAny, SizeOp::kQ   , uint16_t(B(SizeOp::k10) | B(SizeOp::k10Q))
+  },
+
+  { // kVO_V_B8H4:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k01))
+  },
+
+  { // kVO_V_B8H4S2:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k01) | B(SizeOp::k10))
+  },
+
+  { // kVO_V_B8D1:
+    SizeOpTable::kTableAny, SizeOp::kSzQ , uint16_t(B(SizeOp::k00) | B(SizeOp::k11S))
+  },
+
+  { // kVO_V_H4S2:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k01) | B(SizeOp::k10))
+  },
+
+  { // kVO_V_B16:
+    SizeOpTable::kTableBin, SizeOp::kQ   , uint16_t(B(SizeOp::k00Q))
+  },
+
+  { // kVO_V_B16H8:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00Q) | B(SizeOp::k01Q))
+  },
+
+  { // kVO_V_B16H8S4:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00Q) | B(SizeOp::k01Q) | B(SizeOp::k10Q))
+  },
+
+  { // kVO_V_B16D2:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00Q) | B(SizeOp::k11Q))
+  },
+
+  { // kVO_V_H8S4:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k01Q) | B(SizeOp::k10Q))
+  },
+
+  { // kVO_V_S4:
+    SizeOpTable::kTableAny, 0            , uint16_t(B(SizeOp::k10Q))
+  },
+
+  { // kVO_V_D2:
+    SizeOpTable::kTableAny, 0            , uint16_t(B(SizeOp::k11Q))
+  },
+
+  { // kVO_SV_BHS:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k00Q) | B(SizeOp::k00S) | B(SizeOp::k01) | B(SizeOp::k01Q) | B(SizeOp::k01S) | B(SizeOp::k10) | B(SizeOp::k10Q) | B(SizeOp::k10S))
+  },
+
+  { // kVO_SV_B8H4S2:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k00S) | B(SizeOp::k01) | B(SizeOp::k01S) | B(SizeOp::k10) | B(SizeOp::k10S))
+  },
+
+  { // kVO_SV_HS:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k01) | B(SizeOp::k01Q) | B(SizeOp::k01S) | B(SizeOp::k10) | B(SizeOp::k10Q) | B(SizeOp::k10S))
+  },
+
+  { // kVO_V_Any:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k00Q) | B(SizeOp::k01) | B(SizeOp::k01Q) | B(SizeOp::k10) | B(SizeOp::k10Q) | B(SizeOp::k11S) | B(SizeOp::k11Q))
+  },
+
+  { // kVO_SV_Any:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k00Q) | B(SizeOp::k00S) |
+                                                    B(SizeOp::k01) | B(SizeOp::k01Q) | B(SizeOp::k01S) |
+                                                    B(SizeOp::k10) | B(SizeOp::k10Q) | B(SizeOp::k10S) |
+                                                    B(SizeOp::k11) | B(SizeOp::k11Q) | B(SizeOp::k11S))
+  }
+};
+
+static const Operand_& significantSimdOp(const Operand_& o0, const Operand_& o1, uint32_t instFlags) noexcept {
+  return !(instFlags & InstDB::kInstFlagLong) ? o0 : o1;
+}
+
+static inline SizeOp armElementTypeToSizeOp(uint32_t vecOpType, RegType regType, uint32_t elementType) noexcept {
+  // Instruction data or Assembler is wrong if this triggers an assertion failure.
+  ASMJIT_ASSERT(vecOpType < InstDB::kVO_Count);
+  // ElementType uses 3 bits in the operand signature, it should never overflow.
+  ASMJIT_ASSERT(elementType <= 0x7u);
+
+  const SizeOpMap& map = sizeOpMap[vecOpType];
+  const SizeOpTable& table = sizeOpTable[map.tableId];
+
+  size_t index = (Support::min<uint32_t>(diff(regType, RegType::kARM_VecB), diff(RegType::kARM_VecV, RegType::kARM_VecB) + 1) << 3) | elementType;
+  SizeOp op = table.array[index];
+  SizeOp modifiedOp { uint8_t(op.value & map.sizeOpMask) };
+
+  if (!Support::bitTest(map.acceptMask, op.value))
+    modifiedOp.makeInvalid();
+
+  return modifiedOp;
+}
+
+// a64::Assembler - Immediate Encoding Utilities (Integral)
+// ========================================================
+
+using Utils::LogicalImm;
+
+struct HalfWordImm {
+  uint32_t hw;
+  uint32_t inv;
+  uint32_t imm;
+};
+
+struct LMHImm {
+  uint32_t lm;
+  uint32_t h;
+  uint32_t maxRmId;
+};
+
+static inline uint32_t countZeroHalfWords64(uint64_t imm) noexcept {
+  return uint32_t((imm & 0x000000000000FFFFu) == 0) +
+         uint32_t((imm & 0x00000000FFFF0000u) == 0) +
+         uint32_t((imm & 0x0000FFFF00000000u) == 0) +
+         uint32_t((imm & 0xFFFF000000000000u) == 0) ;
+}
+
+static uint32_t encodeMovSequence32(uint32_t out[2], uint32_t imm, uint32_t rd, uint32_t x) noexcept {
+  ASMJIT_ASSERT(rd <= 31);
+
+  uint32_t kMovZ = 0b01010010100000000000000000000000 | (x << 31);
+  uint32_t kMovN = 0b00010010100000000000000000000000;
+  uint32_t kMovK = 0b01110010100000000000000000000000;
+
+  if ((imm & 0xFFFF0000u) == 0x00000000u) {
+    out[0] = kMovZ | (0 << 21) | ((imm & 0xFFFFu) << 5) | rd;
+    return 1;
+  }
+
+  if ((imm & 0xFFFF0000u) == 0xFFFF0000u) {
+    out[0] = kMovN | (0 << 21) | ((~imm & 0xFFFFu) << 5) | rd;
+    return 1;
+  }
+
+  if ((imm & 0x0000FFFFu) == 0x00000000u) {
+    out[0] = kMovZ | (1 << 21) | ((imm >> 16) << 5) | rd;
+    return 1;
+  }
+
+  if ((imm & 0x0000FFFFu) == 0x0000FFFFu) {
+    out[0] = kMovN | (1 << 21) | ((~imm >> 16) << 5) | rd;
+    return 1;
+  }
+
+  out[0] = kMovZ | (0 << 21) | ((imm & 0xFFFFu) << 5) | rd;
+  out[1] = kMovK | (1 << 21) | ((imm     >> 16) << 5) | rd;
+  return 2;
+}
+
+static uint32_t encodeMovSequence64(uint32_t out[4], uint64_t imm, uint32_t rd, uint32_t x) noexcept {
+  ASMJIT_ASSERT(rd <= 31);
+
+  uint32_t kMovZ = 0b11010010100000000000000000000000;
+  uint32_t kMovN = 0b10010010100000000000000000000000;
+  uint32_t kMovK = 0b11110010100000000000000000000000;
+
+  if (imm <= 0xFFFFFFFFu)
+    return encodeMovSequence32(out, uint32_t(imm), rd, x);
+
+  uint32_t zhw = countZeroHalfWords64( imm);
+  uint32_t ohw = countZeroHalfWords64(~imm);
+
+  if (zhw >= ohw) {
+    uint32_t op = kMovZ;
+    uint32_t count = 0;
+
+    for (uint32_t hwIndex = 0; hwIndex < 4; hwIndex++, imm >>= 16) {
+      uint32_t hwImm = uint32_t(imm & 0xFFFFu);
+      if (hwImm == 0)
+        continue;
+
+      out[count++] = op | (hwIndex << 21) | (hwImm << 5) | rd;
+      op = kMovK;
+    }
+
+    // This should not happen - zero should be handled by encodeMovSequence32().
+    ASMJIT_ASSERT(count > 0);
+
+    return count;
+  }
+  else {
+    uint32_t op = kMovN;
+    uint32_t count = 0;
+    uint32_t negMask = 0xFFFFu;
+
+    for (uint32_t hwIndex = 0; hwIndex < 4; hwIndex++, imm >>= 16) {
+      uint32_t hwImm = uint32_t(imm & 0xFFFFu);
+      if (hwImm == 0xFFFFu)
+        continue;
+
+      out[count++] = op | (hwIndex << 21) | ((hwImm ^ negMask) << 5) | rd;
+      op = kMovK;
+      negMask = 0;
+    }
+
+    if (count == 0) {
+      out[count++] = kMovN | ((0xFFFF ^ negMask) << 5) | rd;
+    }
+
+    return count;
+  }
+}
+
+static inline bool encodeLMH(uint32_t sizeField, uint32_t elementIndex, LMHImm* out) noexcept {
+  if (sizeField != 1 && sizeField != 2)
+    return false;
+
+  uint32_t hShift = 3u - sizeField;
+  uint32_t lmShift = sizeField - 1u;
+  uint32_t maxElementIndex = 15u >> sizeField;
+
+  out->h = elementIndex >> hShift;
+  out->lm = (elementIndex << lmShift) & 0x3u;
+  out->maxRmId = (8u << sizeField) - 1;
+
+  return elementIndex <= maxElementIndex;
+}
+
+// [.......A|B.......|.......C|D.......|.......E|F.......|.......G|H.......]
+static inline uint32_t encodeImm64ByteMaskToImm8(uint64_t imm) noexcept {
+  return uint32_t(((imm >> (7  - 0)) & 0b00000011) | // [.......G|H.......]
+                  ((imm >> (23 - 2)) & 0b00001100) | // [.......E|F.......]
+                  ((imm >> (39 - 4)) & 0b00110000) | // [.......C|D.......]
+                  ((imm >> (55 - 6)) & 0b11000000)); // [.......A|B.......]
+}
+
+// a64::Assembler - Opcode
+// =======================
+
+//! Helper class to store and manipulate ARM opcode.
+struct Opcode {
+  uint32_t v;
+
+  enum Bits : uint32_t {
+    kN = (1u << 22),
+    kQ = (1u << 30),
+    kX = (1u << 31)
+  };
+
+  // --------------------------------------------------------------------------
+  // [Opcode Builder]
+  // --------------------------------------------------------------------------
+
+  inline uint32_t get() const noexcept { return v; }
+  inline void reset(uint32_t value) noexcept { v = value; }
+
+  inline bool hasQ() const noexcept { return (v & kQ) != 0; }
+  inline bool hasX() const noexcept { return (v & kX) != 0; }
+
+  template<typename T>
+  inline Opcode& addImm(T value, uint32_t bitIndex) noexcept { return operator|=(uint32_t(value) << bitIndex); }
+
+  template<typename T>
+  inline Opcode& xorImm(T value, uint32_t bitIndex) noexcept { return operator^=(uint32_t(value) << bitIndex); }
+
+  template<typename T, typename Condition>
+  inline Opcode& addIf(T value, const Condition& condition) noexcept { return operator|=(condition ? uint32_t(value) : uint32_t(0)); }
+
+  inline Opcode& addLogicalImm(const LogicalImm& logicalImm) noexcept {
+    addImm(logicalImm.n, 22);
+    addImm(logicalImm.r, 16);
+    addImm(logicalImm.s, 10);
+    return *this;
+  }
+
+  inline Opcode& addReg(uint32_t id, uint32_t bitIndex) noexcept { return operator|=((id & 31u) << bitIndex); }
+  inline Opcode& addReg(const Operand_& op, uint32_t bitIndex) noexcept { return addReg(op.id(), bitIndex); }
+
+  inline Opcode& operator=(uint32_t x) noexcept { v = x; return *this; }
+  inline Opcode& operator&=(uint32_t x) noexcept { v &= x; return *this; }
+  inline Opcode& operator|=(uint32_t x) noexcept { v |= x; return *this; }
+  inline Opcode& operator^=(uint32_t x) noexcept { v ^= x; return *this; }
+
+  inline uint32_t operator&(uint32_t x) const noexcept { return v & x; }
+  inline uint32_t operator|(uint32_t x) const noexcept { return v | x; }
+  inline uint32_t operator^(uint32_t x) const noexcept { return v ^ x; }
+};
+
+// a64::Assembler - Signature Utilities
+// ====================================
+
+// TODO: [ARM] Deprecate matchSignature.
+static inline bool matchSignature(const Operand_& o0, const Operand_& o1, uint32_t instFlags) noexcept {
+  if (!(instFlags & (InstDB::kInstFlagLong | InstDB::kInstFlagNarrow)))
+    return o0.signature() == o1.signature();
+
+  // TODO: [ARM] Something smart to validate this.
+  return true;
+}
+
+static inline bool matchSignature(const Operand_& o0, const Operand_& o1, const Operand_& o2, uint32_t instFlags) noexcept {
+  return matchSignature(o0, o1, instFlags) && o1.signature() == o2.signature();
+}
+
+static inline bool matchSignature(const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, uint32_t instFlags) noexcept {
+  return matchSignature(o0, o1, instFlags) && o1.signature() == o2.signature() && o2.signature() == o3.signature();;
+}
+
+// Memory must be either:
+// 1. Absolute address, which will be converted to relative.
+// 2. Relative displacement (Label).
+// 3. Base register + either offset or index.
+static inline bool armCheckMemBaseIndexRel(const Mem& mem) noexcept {
+  // Allowed base types (Nothing, Label, and GpX).
+  constexpr uint32_t kBaseMask  = B(0) |
+                                  B(RegType::kLabelTag) |
+                                  B(RegType::kARM_GpX);
+
+  // Allowed index types (Nothing, GpW, and GpX).
+  constexpr uint32_t kIndexMask = B(0) |
+                                  B(RegType::kARM_GpW) |
+                                  B(RegType::kARM_GpX) ;
+
+  RegType baseType = mem.baseType();
+  RegType indexType = mem.indexType();
+
+  if (!Support::bitTest(kBaseMask, baseType))
+    return false;
+
+  if (baseType > RegType::kLabelTag) {
+    // Index allows either GpW or GpX.
+    if (!Support::bitTest(kIndexMask, indexType))
+      return false;
+
+    if (indexType == RegType::kNone)
+      return true;
+    else
+      return !mem.hasOffset();
+  }
+  else {
+    // No index register allowed if this is a PC relative address (literal).
+    return indexType == RegType::kNone;
+  }
+}
+
+struct EncodeFpOpcodeBits {
+  uint32_t sizeMask;
+  uint32_t mask[3];
+};
+
+static inline bool pickFpOpcode(const Vec& reg, uint32_t sOp, uint32_t sHf, uint32_t vOp, uint32_t vHf, Opcode* opcode, uint32_t* szOut) noexcept {
+  static constexpr uint32_t kQBitIndex = 30;
+
+  static const EncodeFpOpcodeBits szBits[InstDB::kHF_Count] = {
+    { B(2) | B(1)       , { 0u                           , 0u, B(22) } },
+    { B(2) | B(1) | B(0), { 0u                           , 0u, 0u    } },
+    { B(2) | B(1) | B(0), { B(23) | B(22)                , 0u, B(22) } },
+    { B(2) | B(1) | B(0), { B(22) | B(20) | B(19)        , 0u, B(22) } },
+    { B(2) | B(1) | B(0), { B(22) | B(21) | B(15) | B(14), 0u, B(22) } },
+    { B(2) | B(1) | B(0), { B(23)                        , 0u, B(22) } }
+  };
+
+  if (!reg.hasElementType()) {
+    // Scalar operation [HSD].
+    uint32_t sz = diff(reg.type(), RegType::kARM_VecH);
+    if (sz > 2u || !Support::bitTest(szBits[sHf].sizeMask, sz))
+      return false;
+
+    opcode->reset(szBits[sHf].mask[sz] ^ sOp);
+    *szOut = sz;
+    return sOp != 0;
+  }
+  else {
+    // Vector operation [HSD].
+    uint32_t q = diff(reg.type(), RegType::kARM_VecD);
+    uint32_t sz = reg.elementType() - Vec::kElementTypeH;
+
+    if (q > 1u || sz > 2u || !Support::bitTest(szBits[vHf].sizeMask, sz))
+      return false;
+
+    opcode->reset(szBits[vHf].mask[sz] ^ (vOp | (q << kQBitIndex)));
+    *szOut = sz;
+    return vOp != 0;
+  }
+}
+
+static inline bool pickFpOpcode(const Vec& reg, uint32_t sOp, uint32_t sHf, uint32_t vOp, uint32_t vHf, Opcode* opcode) noexcept {
+  uint32_t sz;
+  return pickFpOpcode(reg, sOp, sHf, vOp, vHf, opcode, &sz);
+}
+
+// a64::Assembler - Operand Checks
+// ===============================
+
+// Checks whether all operands have the same signature.
+static inline bool checkSignature(const Operand_& o0, const Operand_& o1) noexcept {
+  return o0.signature() == o1.signature();
+}
+
+static inline bool checkSignature(const Operand_& o0, const Operand_& o1, const Operand_& o2) noexcept {
+  return o0.signature() == o1.signature() &&
+         o1.signature() == o2.signature();
+}
+
+static inline bool checkSignature(const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) noexcept {
+  return o0.signature() == o1.signature() &&
+         o1.signature() == o2.signature() &&
+         o2.signature() == o3.signature();
+}
+
+// Checks whether the register is GP register of the allowed types.
+//
+// Allowed is a 2-bit mask, where the first bits allows GpW and the second bit
+// allows GpX. These bits are usually stored within the instruction, but could
+// be also hardcoded in the assembler for instructions where GP types are not
+// selectable.
+static inline bool checkGpType(const Operand_& op, uint32_t allowed) noexcept {
+  RegType type = op.as<Reg>().type();
+  return Support::bitTest(allowed << uint32_t(RegType::kARM_GpW), type);
+}
+
+static inline bool checkGpType(const Operand_& op, uint32_t allowed, uint32_t* x) noexcept {
+  // NOTE: We set 'x' to one only when GpW is allowed, otherwise the X is part
+  // of the opcode and we cannot set it. This is why this works without requiring
+  // additional logic.
+  RegType type = op.as<Reg>().type();
+  *x = diff(type, RegType::kARM_GpW) & allowed;
+  return Support::bitTest(allowed << uint32_t(RegType::kARM_GpW), type);
+}
+
+static inline bool checkGpType(const Operand_& o0, const Operand_& o1, uint32_t allowed, uint32_t* x) noexcept {
+  return checkGpType(o0, allowed, x) && checkSignature(o0, o1);
+}
+
+static inline bool checkGpType(const Operand_& o0, const Operand_& o1, const Operand_& o2, uint32_t allowed, uint32_t* x) noexcept {
+  return checkGpType(o0, allowed, x) && checkSignature(o0, o1, o2);
+}
+
+static inline bool checkGpId(const Operand_& op, uint32_t hiId = kZR) noexcept {
+  uint32_t id = op.as<Reg>().id();
+  return id < 31u || id == hiId;
+}
+
+static inline bool checkGpId(const Operand_& o0, const Operand_& o1, uint32_t hiId = kZR) noexcept {
+  uint32_t id0 = o0.as<Reg>().id();
+  uint32_t id1 = o1.as<Reg>().id();
+
+  return (id0 < 31u || id0 == hiId) && (id1 < 31u || id1 == hiId);
+}
+
+static inline bool checkGpId(const Operand_& o0, const Operand_& o1, const Operand_& o2, uint32_t hiId = kZR) noexcept {
+  uint32_t id0 = o0.as<Reg>().id();
+  uint32_t id1 = o1.as<Reg>().id();
+  uint32_t id2 = o2.as<Reg>().id();
+
+  return (id0 < 31u || id0 == hiId) && (id1 < 31u || id1 == hiId) && (id2 < 31u || id2 == hiId);
+}
+
+static inline bool checkVecId(const Operand_& op) noexcept {
+  uint32_t id = op.as<Reg>().id();
+  return id <= 31u;
+}
+
+static inline bool checkVecId(const Operand_& o0, const Operand_& o1) noexcept {
+  uint32_t id0 = o0.as<Reg>().id();
+  uint32_t id1 = o1.as<Reg>().id();
+
+  return (id0 | id1) <= 31u;
+}
+
+/* Unused at the moment.
+static inline bool checkVecId(const Operand_& o0, const Operand_& o1, const Operand_& o2) noexcept {
+  uint32_t id0 = o0.as<Reg>().id();
+  uint32_t id1 = o1.as<Reg>().id();
+  uint32_t id2 = o2.as<Reg>().id();
+
+  return (id0 | id1 | id2) <= 31u;
+}
+
+static inline bool checkVecId(const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) noexcept {
+  uint32_t id0 = o0.as<Reg>().id();
+  uint32_t id1 = o1.as<Reg>().id();
+  uint32_t id2 = o2.as<Reg>().id();
+  uint32_t id3 = o3.as<Reg>().id();
+
+  return (id0 | id1 | id2 | id3) <= 31u;
+}
+*/
+
+static inline bool checkMemBase(const Mem& mem) noexcept {
+  return mem.baseType() == RegType::kARM_GpX && mem.baseId() <= 31;
+}
+
+static inline bool checkEven(const Operand_& o0, const Operand_& o1) noexcept {
+  return ((o0.id() | o1.id()) & 1) == 0;
+}
+
+static inline bool checkConsecutive(const Operand_& o0, const Operand_& o1) noexcept {
+  return ((o0.id() + 1u) & 0x1Fu) == o1.id();
+}
+
+static inline bool checkConsecutive(const Operand_& o0, const Operand_& o1, const Operand_& o2) noexcept {
+  return ((o0.id() + 1u) & 0x1Fu) == o1.id() &&
+         ((o0.id() + 2u) & 0x1Fu) == o2.id();
+}
+
+static inline bool checkConsecutive(const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) noexcept {
+  return ((o0.id() + 1u) & 0x1Fu) == o1.id() &&
+         ((o0.id() + 2u) & 0x1Fu) == o2.id() &&
+         ((o0.id() + 3u) & 0x1Fu) == o3.id();
+}
+
+// a64::Assembler - CheckReg
+// =========================
+
+#define V(index) (index == uint32_t(RegType::kARM_GpW)  ? Gp::kIdZr :  \
+                  index == uint32_t(RegType::kARM_GpX)  ? Gp::kIdZr :  \
+                  index == uint32_t(RegType::kARM_VecB) ? 31u       :  \
+                  index == uint32_t(RegType::kARM_VecH) ? 31u       :  \
+                  index == uint32_t(RegType::kARM_VecS) ? 31u       :  \
+                  index == uint32_t(RegType::kARM_VecD) ? 31u       :  \
+                  index == uint32_t(RegType::kARM_VecV) ? 31u       : 0)
+static const Support::Array<uint8_t, 32> commonHiRegIdOfType = {{
+  ASMJIT_LOOKUP_TABLE_32(V, 0)
+}};
+#undef V
+
+static inline bool checkValidRegs(const Operand_& o0) noexcept {
+  return ((o0.id() < 31) | (o0.id() == commonHiRegIdOfType[o0.as<Reg>().type()]));
+}
+
+static inline bool checkValidRegs(const Operand_& o0, const Operand_& o1) noexcept {
+  return ((o0.id() < 31) | (o0.id() == commonHiRegIdOfType[o0.as<Reg>().type()])) &
+         ((o1.id() < 31) | (o1.id() == commonHiRegIdOfType[o1.as<Reg>().type()])) ;
+}
+
+static inline bool checkValidRegs(const Operand_& o0, const Operand_& o1, const Operand_& o2) noexcept {
+  return ((o0.id() < 31) | (o0.id() == commonHiRegIdOfType[o0.as<Reg>().type()])) &
+         ((o1.id() < 31) | (o1.id() == commonHiRegIdOfType[o1.as<Reg>().type()])) &
+         ((o2.id() < 31) | (o2.id() == commonHiRegIdOfType[o2.as<Reg>().type()])) ;
+}
+
+static inline bool checkValidRegs(const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) noexcept {
+  return ((o0.id() < 31) | (o0.id() == commonHiRegIdOfType[o0.as<Reg>().type()])) &
+         ((o1.id() < 31) | (o1.id() == commonHiRegIdOfType[o1.as<Reg>().type()])) &
+         ((o2.id() < 31) | (o2.id() == commonHiRegIdOfType[o2.as<Reg>().type()])) &
+         ((o3.id() < 31) | (o3.id() == commonHiRegIdOfType[o3.as<Reg>().type()])) ;
+}
+
+// a64::Assembler - Construction & Destruction
+// ===========================================
+
+Assembler::Assembler(CodeHolder* code) noexcept : BaseAssembler() {
+  _archMask = uint64_t(1) << uint32_t(Arch::kAArch64);
+  assignEmitterFuncs(this);
+
+  if (code)
+    code->attach(this);
+}
+
+Assembler::~Assembler() noexcept {}
+
+// a64::Assembler - Emit
+// =====================
+
+#define ENC_OPS1(OP0) \
+  (uint32_t(OperandType::k##OP0))
+
+#define ENC_OPS2(OP0, OP1) \
+  (uint32_t(OperandType::k##OP0) + \
+  (uint32_t(OperandType::k##OP1) << 3))
+
+#define ENC_OPS3(OP0, OP1, OP2) \
+  (uint32_t(OperandType::k##OP0) + \
+  (uint32_t(OperandType::k##OP1) << 3) + \
+  (uint32_t(OperandType::k##OP2) << 6))
+
+#define ENC_OPS4(OP0, OP1, OP2, OP3) \
+  (uint32_t(OperandType::k##OP0) + \
+  (uint32_t(OperandType::k##OP1) << 3) + \
+  (uint32_t(OperandType::k##OP2) << 6) + \
+  (uint32_t(OperandType::k##OP3) << 9))
+
+Error Assembler::_emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) {
+  // Logging/Validation/Error.
+  constexpr InstOptions kRequiresSpecialHandling = InstOptions::kReserved;
+
+  Error err;
+  CodeWriter writer(this);
+
+  // Combine all instruction options and also check whether the instruction
+  // is valid. All options that require special handling (including invalid
+  // instruction) are handled by the next branch.
+  InstOptions options = InstOptions(instId - 1 >= Inst::_kIdCount - 1) | InstOptions((size_t)(_bufferEnd - writer.cursor()) < 4) | instOptions() | forcedInstOptions();
+
+  CondCode instCC = BaseInst::extractARMCondCode(instId);
+  instId = instId & uint32_t(InstIdParts::kRealId);
+
+  if (instId >= Inst::_kIdCount)
+    instId = 0;
+
+  const InstDB::InstInfo* instInfo = &InstDB::_instInfoTable[instId];
+  uint32_t encodingIndex = instInfo->_encodingDataIndex;
+
+  Opcode opcode;
+  uint32_t isign4;
+  uint32_t instFlags;
+
+  const Operand_& o3 = opExt[EmitterUtils::kOp3];
+  const Operand_* rmRel = nullptr;
+
+  uint32_t multipleOpData[4];
+  uint32_t multipleOpCount;
+
+  // These are only used when instruction uses a relative displacement.
+  OffsetFormat offsetFormat;     // Offset format.
+  uint64_t offsetValue;          // Offset value (if known).
+
+  if (ASMJIT_UNLIKELY(Support::test(options, kRequiresSpecialHandling))) {
+    if (ASMJIT_UNLIKELY(!_code))
+      return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+    // Unknown instruction.
+    if (ASMJIT_UNLIKELY(instId == 0))
+      goto InvalidInstruction;
+
+    // Condition code can only be used with 'B' instruction.
+    if (ASMJIT_UNLIKELY(instCC != CondCode::kAL && instId != Inst::kIdB))
+      goto InvalidInstruction;
+
+    // Grow request, happens rarely.
+    err = writer.ensureSpace(this, 4);
+    if (ASMJIT_UNLIKELY(err))
+      goto Failed;
+
+#ifndef ASMJIT_NO_VALIDATION
+    // Strict validation.
+    if (hasDiagnosticOption(DiagnosticOptions::kValidateAssembler)) {
+      Operand_ opArray[Globals::kMaxOpCount];
+      EmitterUtils::opArrayFromEmitArgs(opArray, o0, o1, o2, opExt);
+
+      err = _funcs.validate(arch(), BaseInst(instId, options, _extraReg), opArray, Globals::kMaxOpCount, ValidationFlags::kNone);
+      if (ASMJIT_UNLIKELY(err))
+        goto Failed;
+    }
+#endif
+  }
+
+  // Signature of the first 4 operands.
+  isign4 = (uint32_t(o0.opType())     ) +
+           (uint32_t(o1.opType()) << 3) +
+           (uint32_t(o2.opType()) << 6) +
+           (uint32_t(o3.opType()) << 9);
+  instFlags = instInfo->flags();
+
+  switch (instInfo->_encoding) {
+    // ------------------------------------------------------------------------
+    // [Base - Universal]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseOp: {
+      const InstDB::EncodingData::BaseOp& opData = InstDB::EncodingData::baseOp[encodingIndex];
+
+      if (isign4 == 0) {
+        opcode.reset(opData.opcode);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseOpImm: {
+      const InstDB::EncodingData::BaseOpImm& opData = InstDB::EncodingData::baseOpImm[encodingIndex];
+
+      if (isign4 == ENC_OPS1(Imm)) {
+        uint64_t imm = o0.as<Imm>().valueAs<uint64_t>();
+        uint32_t immMax = 1u << opData.immBits;
+
+        if (imm >= immMax)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(imm, opData.immOffset);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseR: {
+      const InstDB::EncodingData::BaseR& opData = InstDB::EncodingData::baseR[encodingIndex];
+
+      if (isign4 == ENC_OPS1(Reg)) {
+        if (!checkGpType(o0, opData.rType))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, opData.rHiId))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode);
+        opcode.addReg(o0, opData.rShift);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseRR: {
+      const InstDB::EncodingData::BaseRR& opData = InstDB::EncodingData::baseRR[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        uint32_t x;
+        if (!checkGpType(o0, opData.aType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpType(o1, opData.bType))
+          goto InvalidInstruction;
+
+        if (opData.uniform && !checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, opData.aHiId))
+          goto InvalidPhysId;
+
+        if (!checkGpId(o1, opData.bHiId))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addReg(o1, opData.bShift);
+        opcode.addReg(o0, opData.aShift);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseRRR: {
+      const InstDB::EncodingData::BaseRRR& opData = InstDB::EncodingData::baseRRR[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        uint32_t x;
+        if (!checkGpType(o0, opData.aType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpType(o1, opData.bType))
+          goto InvalidInstruction;
+
+        if (!checkGpType(o2, opData.cType))
+          goto InvalidInstruction;
+
+        if (opData.uniform && !checkSignature(o0, o1, o2))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, opData.aHiId))
+          goto InvalidPhysId;
+
+        if (!checkGpId(o1, opData.bHiId))
+          goto InvalidPhysId;
+
+        if (!checkGpId(o2, opData.cHiId))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, 31);
+        opcode.addReg(o2, 16);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseRRRR: {
+      const InstDB::EncodingData::BaseRRRR& opData = InstDB::EncodingData::baseRRRR[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        uint32_t x;
+        if (!checkGpType(o0, opData.aType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpType(o1, opData.bType))
+          goto InvalidInstruction;
+
+        if (!checkGpType(o2, opData.cType))
+          goto InvalidInstruction;
+
+        if (!checkGpType(o3, opData.dType))
+          goto InvalidInstruction;
+
+        if (opData.uniform && !checkSignature(o0, o1, o2, o3))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, opData.aHiId))
+          goto InvalidPhysId;
+
+        if (!checkGpId(o1, opData.bHiId))
+          goto InvalidPhysId;
+
+        if (!checkGpId(o2, opData.cHiId))
+          goto InvalidPhysId;
+
+        if (!checkGpId(o3, opData.dHiId))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, 31);
+        opcode.addReg(o2, 16);
+        opcode.addReg(o3, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseRRII: {
+      const InstDB::EncodingData::BaseRRII& opData = InstDB::EncodingData::baseRRII[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+        if (!checkGpType(o0, opData.aType))
+          goto InvalidInstruction;
+
+        if (!checkGpType(o1, opData.bType))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, opData.aHiId))
+          goto InvalidPhysId;
+
+        if (!checkGpId(o1, opData.bHiId))
+          goto InvalidPhysId;
+
+        if (o2.as<Imm>().valueAs<uint64_t>() >= Support::bitMask(opData.aImmSize + opData.aImmDiscardLsb) ||
+            o3.as<Imm>().valueAs<uint64_t>() >= Support::bitMask(opData.bImmSize + opData.bImmDiscardLsb))
+          goto InvalidImmediate;
+
+        uint32_t aImm = o2.as<Imm>().valueAs<uint32_t>() >> opData.aImmDiscardLsb;
+        uint32_t bImm = o3.as<Imm>().valueAs<uint32_t>() >> opData.bImmDiscardLsb;
+
+        if ((aImm << opData.aImmDiscardLsb) != o2.as<Imm>().valueAs<uint32_t>() ||
+            (bImm << opData.bImmDiscardLsb) != o3.as<Imm>().valueAs<uint32_t>())
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(aImm, opData.aImmOffset);
+        opcode.addImm(bImm, opData.bImmOffset);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Base - Mov]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseMov: {
+      // MOV is a pseudo instruction that uses various instructions depending on its signature.
+      uint32_t x = diff(o0.as<Reg>().type(), RegType::kARM_GpW);
+      if (x > 1)
+        goto InvalidInstruction;
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        if (!o0.as<Reg>().isGp())
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        bool hasSP = o0.as<Gp>().isSP() || o1.as<Gp>().isSP();
+        if (hasSP) {
+          // Cannot be combined with ZR.
+          if (!checkGpId(o0, o1, kSP))
+            goto InvalidPhysId;
+
+          // MOV Rd, Rm -> ADD Rd, Rn, #0.
+          opcode.reset(0b00010001000000000000000000000000);
+          opcode.addImm(x, 31);
+          opcode.addReg(o1, 5);
+          opcode.addReg(o0, 0);
+          goto EmitOp;
+        }
+        else {
+          if (!checkGpId(o0, o1, kZR))
+            goto InvalidPhysId;
+
+          // MOV Rd, Rm -> ORR Rd, <ZR>, Rm.
+          opcode.reset(0b00101010000000000000001111100000);
+          opcode.addImm(x, 31);
+          opcode.addReg(o1, 16);
+          opcode.addReg(o0, 0);
+          goto EmitOp;
+        }
+      }
+
+      if (isign4 == ENC_OPS2(Reg, Imm)) {
+        if (!o0.as<Reg>().isGp())
+          goto InvalidInstruction;
+
+        uint64_t immValue = o1.as<Imm>().valueAs<uint64_t>();
+        if (!x)
+          immValue &= 0xFFFFFFFFu;
+
+        // Prefer a single MOVN/MOVZ instruction over a logical instruction.
+        multipleOpCount = encodeMovSequence64(multipleOpData, immValue, o0.id() & 31, x);
+        if (multipleOpCount == 1 && !o0.as<Gp>().isSP()) {
+          opcode.reset(multipleOpData[0]);
+          goto EmitOp;
+        }
+
+        // Logical instructions use 13-bit immediate pattern encoded as N:ImmR:ImmS.
+        LogicalImm logicalImm;
+        if (!o0.as<Gp>().isZR()) {
+          if (Utils::encodeLogicalImm(immValue, x ? 64 : 32, &logicalImm)) {
+            if (!checkGpId(o0, kSP))
+              goto InvalidPhysId;
+
+            opcode.reset(0b00110010000000000000001111100000);
+            opcode.addImm(x, 31);
+            opcode.addLogicalImm(logicalImm);
+            opcode.addReg(o0, 0);
+            goto EmitOp;
+          }
+        }
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        goto EmitOp_Multiple;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseMovKNZ: {
+      const InstDB::EncodingData::BaseMovKNZ& opData = InstDB::EncodingData::baseMovKNZ[encodingIndex];
+
+      uint32_t x = diff(o0.as<Reg>().type(), RegType::kARM_GpW);
+      if (x > 1)
+        goto InvalidInstruction;
+
+      if (!checkGpId(o0, kZR))
+        goto InvalidPhysId;
+
+      opcode.reset(opData.opcode);
+      opcode.addImm(x, 31);
+
+      if (isign4 == ENC_OPS2(Reg, Imm)) {
+        uint64_t imm16 = o1.as<Imm>().valueAs<uint64_t>();
+        if (imm16 > 0xFFFFu)
+          goto InvalidImmediate;
+
+        opcode.addImm(imm16, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Imm, Imm)) {
+        uint64_t imm16 = o1.as<Imm>().valueAs<uint64_t>();
+        uint32_t shiftType = o2.as<Imm>().predicate();
+        uint64_t shiftValue = o2.as<Imm>().valueAs<uint64_t>();
+
+        if (imm16 > 0xFFFFu || shiftValue > 48 || shiftType != uint32_t(ShiftOp::kLSL))
+          goto InvalidImmediate;
+
+        // Convert shift value to 'hw' field.
+        uint32_t hw = uint32_t(shiftValue) >> 4;
+        if ((hw << 4) != uint32_t(shiftValue))
+          goto InvalidImmediate;
+
+        opcode.addImm(hw, 21);
+        opcode.addImm(imm16, 5);
+        opcode.addReg(o0, 0);
+
+        if (!x && hw > 1u)
+          goto InvalidImmediate;
+
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Base - Adr]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseAdr: {
+      const InstDB::EncodingData::BaseAdr& opData = InstDB::EncodingData::baseAdr[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Label) || isign4 == ENC_OPS2(Reg, Imm)) {
+        if (!o0.as<Reg>().isGpX())
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addReg(o0, 0);
+        offsetFormat.resetToImmValue(opData.offsetType, 4, 5, 21, 0);
+
+        if (instId == Inst::kIdAdrp)
+          offsetFormat._immDiscardLsb = 12;
+
+        rmRel = &o1;
+        goto EmitOp_Rel;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Base - Arithmetic and Logical]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseAddSub: {
+      const InstDB::EncodingData::BaseAddSub& opData = InstDB::EncodingData::baseAddSub[encodingIndex];
+
+      uint32_t x;
+      if (!checkGpType(o0, o1, kWX, &x))
+        goto InvalidInstruction;
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm) || isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+        opcode.reset(uint32_t(opData.immediateOp) << 24);
+
+        // ADD | SUB (immediate) - ZR is not allowed.
+        // ADDS|SUBS (immediate) - ZR allowed in Rd, SP allowed in Rn.
+        uint32_t aHiId = opcode.get() & B(29) ? kZR : kSP;
+        uint32_t bHiId = kSP;
+
+        if (!checkGpId(o0, aHiId) || !checkGpId(o1, bHiId))
+          goto InvalidPhysId;
+
+        // ADD|SUB (immediate) use 12-bit immediate optionally shifted by 'LSL #12'.
+        uint64_t imm = o2.as<Imm>().valueAs<uint64_t>();
+        uint32_t shift = 0;
+
+        if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+          if (o3.as<Imm>().predicate() != uint32_t(ShiftOp::kLSL))
+            goto InvalidImmediate;
+
+          if (o3.as<Imm>().value() != 0 && o3.as<Imm>().value() != 12)
+            goto InvalidImmediate;
+
+          shift = uint32_t(o3.as<Imm>().value() != 0);
+        }
+
+        // Accept immediate value of '0x00XXX000' by setting 'shift' to 12.
+        if (imm > 0xFFFu) {
+          if (shift || (imm & ~uint64_t(0xFFFu << 12)) != 0)
+            goto InvalidImmediate;
+          shift = 1;
+          imm >>= 12;
+        }
+
+        opcode.addImm(x, 31);
+        opcode.addImm(shift, 22);
+        opcode.addImm(imm, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg) || isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        if (!checkSignature(o1, o2))
+          goto InvalidInstruction;
+
+        uint32_t opSize = x ? 64 : 32;
+        uint64_t shift = 0;
+        uint32_t sType = uint32_t(ShiftOp::kLSL);
+
+        if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+          sType = o3.as<Imm>().predicate();
+          shift = o3.as<Imm>().valueAs<uint64_t>();
+        }
+
+        if (!checkGpId(o2, kZR))
+          goto InvalidPhysId;
+
+        // Shift operation - LSL, LSR, ASR.
+        if (sType <= uint32_t(ShiftOp::kASR)) {
+          bool hasSP = o0.as<Gp>().isSP() || o1.as<Gp>().isSP();
+          if (!hasSP) {
+            if (!checkGpId(o0, o1, kZR))
+              goto InvalidPhysId;
+
+            if (shift >= opSize)
+              goto InvalidImmediate;
+
+            opcode.reset(uint32_t(opData.shiftedOp) << 21);
+            opcode.addImm(x, 31);
+            opcode.addImm(sType, 22);
+            opcode.addReg(o2, 16);
+            opcode.addImm(shift, 10);
+            opcode.addReg(o1, 5);
+            opcode.addReg(o0, 0);
+            goto EmitOp;
+          }
+
+          // SP register can only be used with LSL or Extend.
+          if (sType != uint32_t(ShiftOp::kLSL))
+            goto InvalidImmediate;
+          sType = x ? uint32_t(ShiftOp::kUXTX) : uint32_t(ShiftOp::kUXTW);
+        }
+
+        // Extend operation - UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX.
+        opcode.reset(uint32_t(opData.extendedOp) << 21);
+        sType -= uint32_t(ShiftOp::kUXTB);
+
+        if (sType > 7 || shift > 4)
+          goto InvalidImmediate;
+
+        if (!(opcode.get() & B(29))) {
+          // ADD|SUB (extend) - ZR is not allowed.
+          if (!checkGpId(o0, o1, kSP))
+            goto InvalidPhysId;
+        }
+        else {
+          // ADDS|SUBS (extend) - ZR allowed in Rd, SP allowed in Rn.
+          if (!checkGpId(o0, kZR) || !checkGpId(o1, kSP))
+            goto InvalidPhysId;
+        }
+
+        opcode.addImm(x, 31);
+        opcode.addReg(o2, 16);
+        opcode.addImm(sType, 13);
+        opcode.addImm(shift, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseLogical: {
+      const InstDB::EncodingData::BaseLogical& opData = InstDB::EncodingData::baseLogical[encodingIndex];
+
+      uint32_t x;
+      if (!checkGpType(o0, o1, kWX, &x))
+        goto InvalidInstruction;
+
+      if (!checkSignature(o0, o1))
+        goto InvalidInstruction;
+
+      uint32_t opSize = x ? 64 : 32;
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm) && opData.immediateOp != 0) {
+        opcode.reset(uint32_t(opData.immediateOp) << 23);
+
+        // AND|ANDS|BIC|BICS|ORR|EOR (immediate) uses a LogicalImm format described by N:R:S values.
+        uint64_t immMask = Support::lsbMask<uint64_t>(opSize);
+        uint64_t immValue = o2.as<Imm>().valueAs<uint64_t>();
+
+        if (opData.negateImm)
+          immValue ^= immMask;
+
+        // Logical instructions use 13-bit immediate pattern encoded as N:ImmS:ImmR.
+        LogicalImm logicalImm;
+        if (!Utils::encodeLogicalImm(immValue & immMask, opSize, &logicalImm))
+          goto InvalidImmediate;
+
+        // AND|BIC|ORR|EOR (immediate) can have SP on destination, but ANDS|BICS (immediate) cannot.
+        uint32_t kOpANDS = 0x3 << 29;
+        bool isANDS = (opcode.get() & kOpANDS) == kOpANDS;
+
+        if (!checkGpId(o0, isANDS ? kZR : kSP) || !checkGpId(o1, kZR))
+          goto InvalidPhysId;
+
+        opcode.addImm(x, 31);
+        opcode.addLogicalImm(logicalImm);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      if (!checkSignature(o1, o2))
+        goto InvalidInstruction;
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (!checkGpId(o0, o1, o2, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(uint32_t(opData.shiftedOp) << 21);
+        opcode.addImm(x, 31);
+        opcode.addReg(o2, 16);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        if (!checkGpId(o0, o1, o2, kZR))
+          goto InvalidPhysId;
+
+        uint32_t shiftType = o3.as<Imm>().predicate();
+        uint64_t opShift = o3.as<Imm>().valueAs<uint64_t>();
+
+        if (shiftType > 0x3 || opShift >= opSize)
+          goto InvalidImmediate;
+
+        opcode.reset(uint32_t(opData.shiftedOp) << 21);
+        opcode.addImm(x, 31);
+        opcode.addImm(shiftType, 22);
+        opcode.addReg(o2, 16);
+        opcode.addImm(opShift, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseCmpCmn: {
+      const InstDB::EncodingData::BaseCmpCmn& opData = InstDB::EncodingData::baseCmpCmn[encodingIndex];
+
+      uint32_t x;
+      if (!checkGpType(o0, kWX, &x))
+        goto InvalidInstruction;
+
+      if (isign4 == ENC_OPS2(Reg, Imm)) {
+        // CMN|CMP (immediate) - ZR is not allowed.
+        if (!checkGpId(o0, kSP))
+          goto InvalidPhysId;
+
+        // CMN|CMP (immediate) use 12-bit immediate optionally shifted by 'LSL #12'.
+        const Imm& imm12 = o1.as<Imm>();
+        uint32_t immShift = 0;
+        uint64_t immValue = imm12.valueAs<uint64_t>();
+
+        if (immValue > 0xFFFu) {
+          if ((immValue & ~uint64_t(0xFFFu << 12)) != 0)
+            goto InvalidImmediate;
+          immShift = 1;
+          immValue >>= 12;
+        }
+
+        opcode.reset(uint32_t(opData.immediateOp) << 24);
+        opcode.addImm(x, 31);
+        opcode.addImm(immShift, 22);
+        opcode.addImm(immValue, 10);
+        opcode.addReg(o0, 5);
+        opcode.addReg(Gp::kIdZr, 0);
+        goto EmitOp;
+      }
+
+      if (isign4 == ENC_OPS2(Reg, Reg) || isign4 == ENC_OPS3(Reg, Reg, Imm)) {
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        uint32_t opSize = x ? 64 : 32;
+        uint32_t sType = 0;
+        uint64_t shift = 0;
+
+        if (isign4 == ENC_OPS3(Reg, Reg, Imm)) {
+          sType = o2.as<Imm>().predicate();
+          shift = o2.as<Imm>().valueAs<uint64_t>();
+        }
+
+        bool hasSP = o0.as<Gp>().isSP() || o1.as<Gp>().isSP();
+
+        // Shift operation - LSL, LSR, ASR.
+        if (sType <= uint32_t(ShiftOp::kASR)) {
+          if (!hasSP) {
+            if (shift >= opSize)
+              goto InvalidImmediate;
+
+            opcode.reset(uint32_t(opData.shiftedOp) << 21);
+            opcode.addImm(x, 31);
+            opcode.addImm(sType, 22);
+            opcode.addReg(o1, 16);
+            opcode.addImm(shift, 10);
+            opcode.addReg(o0, 5);
+            opcode.addReg(Gp::kIdZr, 0);
+            goto EmitOp;
+          }
+
+          // SP register can only be used with LSL or Extend.
+          if (sType != uint32_t(ShiftOp::kLSL))
+            goto InvalidImmediate;
+
+          sType = x ? uint32_t(ShiftOp::kUXTX) : uint32_t(ShiftOp::kUXTW);
+        }
+
+        // Extend operation - UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX.
+        sType -= uint32_t(ShiftOp::kUXTB);
+        if (sType > 7 || shift > 4)
+          goto InvalidImmediate;
+
+        opcode.reset(uint32_t(opData.extendedOp) << 21);
+        opcode.addImm(x, 31);
+        opcode.addReg(o1, 16);
+        opcode.addImm(sType, 13);
+        opcode.addImm(shift, 10);
+        opcode.addReg(o0, 5);
+        opcode.addReg(Gp::kIdZr, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseMvnNeg: {
+      const InstDB::EncodingData::BaseMvnNeg& opData = InstDB::EncodingData::baseMvnNeg[encodingIndex];
+
+      uint32_t x;
+      if (!checkGpType(o0, o1, kWX, &x))
+        goto InvalidInstruction;
+
+      opcode.reset(opData.opcode);
+      opcode.addImm(x, 31);
+      opcode.addReg(o1, 16);
+      opcode.addReg(o0, 0);
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        goto EmitOp;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm)) {
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        uint32_t opSize = x ? 64 : 32;
+        uint32_t shiftType = o2.as<Imm>().predicate();
+        uint64_t opShift = o2.as<Imm>().valueAs<uint64_t>();
+
+        if (shiftType > uint32_t(ShiftOp::kROR) || opShift >= opSize)
+          goto InvalidImmediate;
+
+        opcode.addImm(shiftType, 22);
+        opcode.addImm(opShift, 10);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseTst: {
+      const InstDB::EncodingData::BaseTst& opData = InstDB::EncodingData::baseTst[encodingIndex];
+
+      uint32_t x;
+      if (!checkGpType(o0, kWX, &x))
+        goto InvalidInstruction;
+
+      uint32_t opSize = x ? 64 : 32;
+
+      if (isign4 == ENC_OPS2(Reg, Imm) && opData.immediateOp != 0) {
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        // TST (immediate) uses a LogicalImm format described by N:R:S values.
+        uint64_t immMask = Support::lsbMask<uint64_t>(opSize);
+        uint64_t immValue = o1.as<Imm>().valueAs<uint64_t>();
+
+        // Logical instructions use 13-bit immediate pattern encoded as N:ImmS:ImmR.
+        LogicalImm logicalImm;
+        if (!Utils::encodeLogicalImm(immValue & immMask, opSize, &logicalImm))
+          goto InvalidImmediate;
+
+        opcode.reset(uint32_t(opData.immediateOp) << 22);
+        opcode.addLogicalImm(logicalImm);
+        opcode.addImm(x, 31);
+        opcode.addReg(o0, 5);
+        opcode.addReg(Gp::kIdZr, 0);
+        goto EmitOp;
+      }
+
+      opcode.reset(uint32_t(opData.shiftedOp) << 21);
+      opcode.addImm(x, 31);
+      opcode.addReg(o1, 16);
+      opcode.addReg(o0, 5);
+      opcode.addReg(Gp::kIdZr, 0);
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        goto EmitOp;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm)) {
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        uint32_t shiftType = o2.as<Imm>().predicate();
+        uint64_t opShift = o2.as<Imm>().valueAs<uint64_t>();
+
+        if (shiftType > 0x3 || opShift >= opSize)
+          goto InvalidImmediate;
+
+        opcode.addImm(shiftType, 22);
+        opcode.addImm(opShift, 10);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Base - Bit Manipulation]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseBfc: {
+      const InstDB::EncodingData::BaseBfc& opData = InstDB::EncodingData::baseBfc[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Imm, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0))
+          goto InvalidPhysId;
+
+        uint64_t lsb = o1.as<Imm>().valueAs<uint64_t>();
+        uint64_t width = o2.as<Imm>().valueAs<uint64_t>();
+        uint32_t opSize = x ? 64 : 32;
+
+        if (lsb >= opSize || width == 0 || width > opSize)
+          goto InvalidImmediate;
+
+        uint32_t lsb32 = Support::neg(uint32_t(lsb)) & (opSize - 1);
+        uint32_t width32 = uint32_t(width) - 1;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addImm(x, 22);
+        opcode.addImm(lsb32, 16);
+        opcode.addImm(width32, 10);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseBfi: {
+      const InstDB::EncodingData::BaseBfi& opData = InstDB::EncodingData::baseBfi[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1))
+          goto InvalidPhysId;
+
+        uint64_t lsb = o2.as<Imm>().valueAs<uint64_t>();
+        uint64_t width = o3.as<Imm>().valueAs<uint64_t>();
+        uint32_t opSize = x ? 64 : 32;
+
+        if (lsb >= opSize || width == 0 || width > opSize)
+          goto InvalidImmediate;
+
+        uint32_t lImm = Support::neg(uint32_t(lsb)) & (opSize - 1);
+        uint32_t wImm = uint32_t(width) - 1;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addImm(x, 22);
+        opcode.addImm(lImm, 16);
+        opcode.addImm(wImm, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseBfm: {
+      const InstDB::EncodingData::BaseBfm& opData = InstDB::EncodingData::baseBfm[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1))
+          goto InvalidPhysId;
+
+        uint64_t immR = o2.as<Imm>().valueAs<uint64_t>();
+        uint64_t immS = o3.as<Imm>().valueAs<uint64_t>();
+        uint32_t opSize = x ? 64 : 32;
+
+        if ((immR | immS) >= opSize)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addImm(x, 22);
+        opcode.addImm(immR, 16);
+        opcode.addImm(immS, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseBfx: {
+      const InstDB::EncodingData::BaseBfx& opData = InstDB::EncodingData::baseBfx[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1))
+          goto InvalidPhysId;
+
+        uint64_t lsb = o2.as<Imm>().valueAs<uint64_t>();
+        uint64_t width = o3.as<Imm>().valueAs<uint64_t>();
+        uint32_t opSize = x ? 64 : 32;
+
+        if (lsb >= opSize || width == 0 || width > opSize)
+          goto InvalidImmediate;
+
+        uint32_t lsb32 = uint32_t(lsb);
+        uint32_t width32 = lsb32 + uint32_t(width) - 1u;
+
+        if (width32 >= opSize)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addImm(x, 22);
+        opcode.addImm(lsb32, 16);
+        opcode.addImm(width32, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseExtend: {
+      const InstDB::EncodingData::BaseExtend& opData = InstDB::EncodingData::baseExtend[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        uint32_t x;
+        if (!checkGpType(o0, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!o1.as<Reg>().isGpW())
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, 31);
+        opcode.addImm(x, 22);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseExtract: {
+      const InstDB::EncodingData::BaseExtract& opData = InstDB::EncodingData::baseExtract[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1, o2))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, o2))
+          goto InvalidPhysId;
+
+        uint64_t lsb = o3.as<Imm>().valueAs<uint64_t>();
+        uint32_t opSize = x ? 64 : 32;
+
+        if (lsb >= opSize)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addImm(x, 22);
+        opcode.addReg(o2, 16);
+        opcode.addImm(lsb, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseRev: {
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        uint32_t x;
+        if (!checkGpType(o0, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1))
+          goto InvalidPhysId;
+
+        opcode.reset(0b01011010110000000000100000000000);
+        opcode.addImm(x, 31);
+        opcode.addImm(x, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseShift: {
+      const InstDB::EncodingData::BaseShift& opData = InstDB::EncodingData::baseShift[encodingIndex];
+
+      uint32_t x;
+      if (!checkGpType(o0, kWX, &x))
+        goto InvalidInstruction;
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (!checkSignature(o0, o1, o2))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, o2, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.registerOp());
+        opcode.addImm(x, 31);
+        opcode.addReg(o2, 16);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm) && opData.immediateOp()) {
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        uint64_t immR = o2.as<Imm>().valueAs<uint64_t>();
+        uint32_t opSize = x ? 64 : 32;
+
+        if (immR >= opSize)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.immediateOp());
+        opcode.addImm(x, 31);
+        opcode.addImm(x, 22);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+
+        if (opcode.get() & B(10)) {
+          // ASR and LSR (immediate) has the same logic.
+          opcode.addImm(x, 15);
+          opcode.addImm(immR, 16);
+          goto EmitOp;
+        }
+
+        if (opData.ror == 0) {
+          // LSL (immediate) is an alias to UBFM
+          uint32_t ubfmImmR = Support::neg(uint32_t(immR)) & (opSize - 1);
+          uint32_t ubfmImmS = opSize - 1 - uint32_t(immR);
+
+          opcode.addImm(ubfmImmR, 16);
+          opcode.addImm(ubfmImmS, 10);
+          goto EmitOp;
+        }
+        else {
+          // ROR (immediate) is an alias to EXTR.
+          opcode.addImm(immR, 10);
+          opcode.addReg(o1, 16);
+          goto EmitOp;
+        }
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Base - Conditionals]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseCCmp: {
+      const InstDB::EncodingData::BaseCCmp& opData = InstDB::EncodingData::baseCCmp[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm) || isign4 == ENC_OPS4(Reg, Imm, Imm, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        uint64_t nzcv = o2.as<Imm>().valueAs<uint64_t>();
+        uint64_t cond = o3.as<Imm>().valueAs<uint64_t>();
+
+        if ((nzcv | cond) > 0xFu)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addImm(condCodeToOpcodeCond(uint32_t(cond)), 12);
+        opcode.addImm(nzcv, 0);
+
+        if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+          // CCMN|CCMP (register) form.
+          if (!checkSignature(o0, o1))
+            goto InvalidInstruction;
+
+          if (!checkGpId(o1, kZR))
+            goto InvalidPhysId;
+
+          opcode.addReg(o1, 16);
+          opcode.addReg(o0, 5);
+          goto EmitOp;
+        }
+        else {
+          // CCMN|CCMP (immediate) form.
+          uint64_t imm5 = o1.as<Imm>().valueAs<uint64_t>();
+          if (imm5 > 0x1F)
+            goto InvalidImmediate;
+
+          opcode.addImm(1, 11);
+          opcode.addImm(imm5, 16);
+          opcode.addReg(o0, 5);
+          goto EmitOp;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseCInc: {
+      const InstDB::EncodingData::BaseCInc& opData = InstDB::EncodingData::baseCInc[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, o1, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        uint64_t cond = o2.as<Imm>().valueAs<uint64_t>();
+        if (cond - 2u > 0xEu)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addReg(o1, 16);
+        opcode.addImm(condCodeToOpcodeCond(uint32_t(cond)) ^ 1u, 12);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseCSel: {
+      const InstDB::EncodingData::BaseCSel& opData = InstDB::EncodingData::baseCSel[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, o1, o2, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, o2, kZR))
+          goto InvalidPhysId;
+
+        uint64_t cond = o3.as<Imm>().valueAs<uint64_t>();
+        if (cond > 0xFu)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addReg(o2, 16);
+        opcode.addImm(condCodeToOpcodeCond(uint32_t(cond)), 12);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseCSet: {
+      const InstDB::EncodingData::BaseCSet& opData = InstDB::EncodingData::baseCSet[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        uint64_t cond = o1.as<Imm>().valueAs<uint64_t>();
+        if (cond - 2u >= 0xEu)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addImm(condCodeToOpcodeCond(uint32_t(cond)) ^ 1u, 12);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Base - Special]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseAtDcIcTlbi: {
+      const InstDB::EncodingData::BaseAtDcIcTlbi& opData = InstDB::EncodingData::baseAtDcIcTlbi[encodingIndex];
+
+      if (isign4 == ENC_OPS1(Imm) || isign4 == ENC_OPS2(Imm, Reg)) {
+        if (opData.mandatoryReg && isign4 != ENC_OPS2(Imm, Reg))
+          goto InvalidInstruction;
+
+        if (o0.as<Imm>().valueAs<uint64_t>() > 0x7FFFu)
+          goto InvalidImmediate;
+
+        uint32_t imm = o0.as<Imm>().valueAs<uint32_t>();
+        if ((imm & opData.immVerifyMask) != opData.immVerifyData)
+          goto InvalidImmediate;
+
+        uint32_t rt = 31;
+        if (o1.isReg()) {
+          if (!o1.as<Reg>().isGpX())
+            goto InvalidInstruction;
+
+          if (!checkGpId(o1, kZR))
+            goto InvalidPhysId;
+
+          rt = o1.id() & 31;
+        }
+
+        opcode.reset(0b11010101000010000000000000000000);
+        opcode.addImm(imm, 5);
+        opcode.addReg(rt, 0);
+        goto EmitOp;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingBaseMrs: {
+      if (isign4 == ENC_OPS2(Reg, Imm)) {
+        if (!o0.as<Reg>().isGpX())
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        if (o1.as<Imm>().valueAs<uint64_t>() > 0xFFFFu)
+          goto InvalidImmediate;
+
+        uint32_t imm = o1.as<Imm>().valueAs<uint32_t>();
+        if (!(imm & B(15)))
+          goto InvalidImmediate;
+
+        opcode.reset(0b11010101001100000000000000000000);
+        opcode.addImm(imm, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseMsr: {
+      if (isign4 == ENC_OPS2(Imm, Reg)) {
+        if (!o1.as<Reg>().isGpX())
+          goto InvalidInstruction;
+
+        if (o0.as<Imm>().valueAs<uint64_t>() > 0xFFFFu)
+          goto InvalidImmediate;
+
+        uint32_t imm = o0.as<Imm>().valueAs<uint32_t>();
+        if (!(imm & B(15)))
+          goto InvalidImmediate;
+
+        if (!checkGpId(o1, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(0b11010101000100000000000000000000);
+        opcode.addImm(imm, 5);
+        opcode.addReg(o1, 0);
+        goto EmitOp;
+      }
+
+      if (isign4 == ENC_OPS2(Imm, Imm)) {
+        if (o0.as<Imm>().valueAs<uint64_t>() > 0x1Fu)
+          goto InvalidImmediate;
+
+        if (o1.as<Imm>().valueAs<uint64_t>() > 0xFu)
+          goto InvalidImmediate;
+
+        uint32_t op = o0.as<Imm>().valueAs<uint32_t>();
+        uint32_t cRm = o1.as<Imm>().valueAs<uint32_t>();
+
+        uint32_t op1 = uint32_t(op) >> 3;
+        uint32_t op2 = uint32_t(op) & 0x7u;
+
+        opcode.reset(0b11010101000000000100000000011111);
+        opcode.addImm(op1, 16);
+        opcode.addImm(cRm, 8);
+        opcode.addImm(op2, 5);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseSys: {
+      if (isign4 == ENC_OPS4(Imm, Imm, Imm, Imm)) {
+        if (o0.as<Imm>().valueAs<uint64_t>() > 0x7u ||
+            o1.as<Imm>().valueAs<uint64_t>() > 0xFu ||
+            o2.as<Imm>().valueAs<uint64_t>() > 0xFu ||
+            o3.as<Imm>().valueAs<uint64_t>() > 0x7u)
+          goto InvalidImmediate;
+
+        uint32_t op1 = o0.as<Imm>().valueAs<uint32_t>();
+        uint32_t cRn = o1.as<Imm>().valueAs<uint32_t>();
+        uint32_t cRm = o2.as<Imm>().valueAs<uint32_t>();
+        uint32_t op2 = o3.as<Imm>().valueAs<uint32_t>();
+        uint32_t rt = 31;
+
+        const Operand_& o4 = opExt[EmitterUtils::kOp4];
+        if (o4.isReg()) {
+          if (!o4.as<Reg>().isGpX())
+            goto InvalidInstruction;
+
+          if (!checkGpId(o4, kZR))
+            goto InvalidPhysId;
+
+          rt = o4.id() & 31;
+        }
+        else if (!o4.isNone()) {
+          goto InvalidInstruction;
+        }
+
+        opcode.reset(0b11010101000010000000000000000000);
+        opcode.addImm(op1, 16);
+        opcode.addImm(cRn, 12);
+        opcode.addImm(cRm, 8);
+        opcode.addImm(op2, 5);
+        opcode.addImm(rt, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Base - Branch]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseBranchReg: {
+      const InstDB::EncodingData::BaseBranchReg& opData = InstDB::EncodingData::baseBranchReg[encodingIndex];
+
+      if (isign4 == ENC_OPS1(Reg)) {
+        if (!o0.as<Reg>().isGpX())
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode);
+        opcode.addReg(o0, 5);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseBranchRel: {
+      const InstDB::EncodingData::BaseBranchRel& opData = InstDB::EncodingData::baseBranchRel[encodingIndex];
+
+      if (isign4 == ENC_OPS1(Label) || isign4 == ENC_OPS1(Imm)) {
+        opcode.reset(opData.opcode);
+        rmRel = &o0;
+
+        if (instCC != CondCode::kAL) {
+          opcode |= B(30);
+          opcode.addImm(condCodeToOpcodeCond(uint32_t(instCC)), 0);
+          offsetFormat.resetToImmValue(OffsetType::kSignedOffset, 4, 5, 19, 2);
+          goto EmitOp_Rel;
+        }
+
+        offsetFormat.resetToImmValue(OffsetType::kSignedOffset, 4, 0, 26, 2);
+        goto EmitOp_Rel;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseBranchCmp: {
+      const InstDB::EncodingData::BaseBranchCmp& opData = InstDB::EncodingData::baseBranchCmp[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Label) || isign4 == ENC_OPS2(Reg, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addReg(o0, 0);
+        offsetFormat.resetToImmValue(OffsetType::kSignedOffset, 4, 5, 19, 2);
+
+        rmRel = &o1;
+        goto EmitOp_Rel;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseBranchTst: {
+      const InstDB::EncodingData::BaseBranchTst& opData = InstDB::EncodingData::baseBranchTst[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Imm, Label) || isign4 == ENC_OPS3(Reg, Imm, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        uint64_t imm = o1.as<Imm>().valueAs<uint64_t>();
+
+        opcode.reset(opData.opcode);
+        if (imm >= 32) {
+          if (!x)
+            goto InvalidImmediate;
+          opcode.addImm(x, 31);
+          imm &= 0x1F;
+        }
+
+        opcode.addReg(o0, 0);
+        opcode.addImm(imm, 19);
+        offsetFormat.resetToImmValue(OffsetType::kSignedOffset, 4, 5, 14, 2);
+
+        rmRel = &o2;
+        goto EmitOp_Rel;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Base - Load / Store]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseLdSt: {
+      const InstDB::EncodingData::BaseLdSt& opData = InstDB::EncodingData::baseLdSt[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Mem)) {
+        const Mem& m = o1.as<Mem>();
+        rmRel = &m;
+
+        uint32_t x;
+        if (!checkGpType(o0, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        // Instructions that work with either word or dword have the unsigned
+        // offset shift set to 2 (word), so we set it to 3 (dword) if this is
+        // X version of the instruction.
+        uint32_t xShiftMask = uint32_t(opData.uOffsetShift == 2);
+        uint32_t immShift = uint32_t(opData.uOffsetShift) + (x & xShiftMask);
+
+        if (!armCheckMemBaseIndexRel(m))
+          goto InvalidAddress;
+
+        int64_t offset = m.offset();
+        if (m.hasBaseReg()) {
+          // [Base {Offset | Index}]
+          if (m.hasIndex()) {
+            uint32_t opt = armShiftOpToLdStOptMap[m.predicate()];
+            if (opt == 0xFF)
+              goto InvalidAddress;
+
+            uint32_t shift = m.shift();
+            uint32_t s = shift != 0;
+
+            if (s && shift != immShift)
+              goto InvalidAddressScale;
+
+            opcode.reset(uint32_t(opData.registerOp) << 21);
+            opcode.xorImm(x, opData.xOffset);
+            opcode.addImm(opt, 13);
+            opcode.addImm(s, 12);
+            opcode |= B(11);
+            opcode.addReg(o0, 0);
+            goto EmitOp_MemBaseIndex_Rn5_Rm16;
+          }
+
+          // Makes it easier to work with the offset especially on 32-bit arch.
+          if (!Support::isInt32(offset))
+            goto InvalidDisplacement;
+          int32_t offset32 = int32_t(offset);
+
+          if (m.isPreOrPost()) {
+            if (!Support::isInt9(offset32))
+              goto InvalidDisplacement;
+
+            opcode.reset(uint32_t(opData.prePostOp) << 21);
+            opcode.xorImm(x, opData.xOffset);
+            opcode.addImm(offset32 & 0x1FF, 12);
+            opcode.addImm(m.isPreIndex(), 11);
+            opcode |= B(10);
+            opcode.addReg(o0, 0);
+            goto EmitOp_MemBase_Rn5;
+          }
+          else {
+            uint32_t imm12 = uint32_t(offset32) >> immShift;
+
+            // Alternative form of LDUR/STUR and related instructions as described by AArch64 reference manual:
+            //
+            // If this instruction is not encodable with scaled unsigned offset, try unscaled signed offset.
+            if (!Support::isUInt12(imm12) || (imm12 << immShift) != uint32_t(offset32)) {
+              instId = opData.uAltInstId;
+              instInfo = &InstDB::_instInfoTable[instId];
+              encodingIndex = instInfo->_encodingDataIndex;
+              goto Case_BaseLdurStur;
+            }
+
+            opcode.reset(uint32_t(opData.uOffsetOp) << 22);
+            opcode.xorImm(x, opData.xOffset);
+            opcode.addImm(imm12, 10);
+            opcode.addReg(o0, 0);
+            goto EmitOp_MemBase_Rn5;
+          }
+        }
+        else {
+          if (!opData.literalOp)
+            goto InvalidAddress;
+
+          opcode.reset(uint32_t(opData.literalOp) << 24);
+          opcode.xorImm(x, opData.xOffset);
+          opcode.addReg(o0, 0);
+          offsetFormat.resetToImmValue(OffsetType::kSignedOffset, 4, 5, 19, 2);
+          goto EmitOp_Rel;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseLdpStp: {
+      const InstDB::EncodingData::BaseLdpStp& opData = InstDB::EncodingData::baseLdpStp[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Mem)) {
+        const Mem& m = o2.as<Mem>();
+        rmRel = &m;
+
+        uint32_t x;
+        if (!checkGpType(o0, o1, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        if (m.baseType() != RegType::kARM_GpX || m.hasIndex())
+          goto InvalidAddress;
+
+        if (m.isOffset64Bit())
+          goto InvalidDisplacement;
+
+        uint32_t offsetShift = opData.offsetShift + x;
+        int32_t offset32 = m.offsetLo32() >> offsetShift;
+
+        // Make sure we didn't lose bits by applying the mandatory offset shift.
+        if (uint32_t(offset32) << offsetShift != uint32_t(m.offsetLo32()))
+          goto InvalidDisplacement;
+
+        // Offset is encoded as 7-bit immediate.
+        if (!Support::isInt7(offset32))
+          goto InvalidDisplacement;
+
+        if (m.isPreOrPost() && offset32 != 0) {
+          if (!opData.prePostOp)
+            goto InvalidAddress;
+
+          opcode.reset(uint32_t(opData.prePostOp) << 22);
+          opcode.addImm(m.isPreIndex(), 24);
+        }
+        else {
+          opcode.reset(uint32_t(opData.offsetOp) << 22);
+        }
+
+        opcode.addImm(x, opData.xOffset);
+        opcode.addImm(offset32 & 0x7F, 15);
+        opcode.addReg(o1, 10);
+        opcode.addReg(o0, 0);
+        goto EmitOp_MemBase_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseStx: {
+      const InstDB::EncodingData::BaseStx& opData = InstDB::EncodingData::baseStx[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Mem)) {
+        const Mem& m = o2.as<Mem>();
+        uint32_t x;
+
+        if (!o0.as<Reg>().isGpW() || !checkGpType(o1, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, opData.xOffset);
+        opcode.addReg(o0, 16);
+        opcode.addReg(o1, 0);
+
+        rmRel = &m;
+        goto EmitOp_MemBaseNoImm_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseLdxp: {
+      const InstDB::EncodingData::BaseLdxp& opData = InstDB::EncodingData::baseLdxp[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Mem)) {
+        const Mem& m = o2.as<Mem>();
+        uint32_t x;
+
+        if (!checkGpType(o0, opData.rType, &x) || !checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, opData.xOffset);
+        opcode.addReg(o1, 10);
+        opcode.addReg(o0, 0);
+
+        rmRel = &m;
+        goto EmitOp_MemBaseNoImm_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseStxp: {
+      const InstDB::EncodingData::BaseStxp& opData = InstDB::EncodingData::baseStxp[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
+        const Mem& m = o3.as<Mem>();
+        uint32_t x;
+
+        if (!o0.as<Reg>().isGpW() || !checkGpType(o1, opData.rType, &x) || !checkSignature(o1, o2))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, o2, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, opData.xOffset);
+        opcode.addReg(o0, 16);
+        opcode.addReg(o2, 10);
+        opcode.addReg(o1, 0);
+
+        rmRel = &m;
+        goto EmitOp_MemBaseNoImm_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseRM_NoImm: {
+      const InstDB::EncodingData::BaseRM_NoImm& opData = InstDB::EncodingData::baseRM_NoImm[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Mem)) {
+        const Mem& m = o1.as<Mem>();
+        rmRel = &m;
+
+        uint32_t x;
+        if (!checkGpType(o0, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, opData.rHiId))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, opData.xOffset);
+        opcode.addReg(o0, 0);
+        goto EmitOp_MemBaseNoImm_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseRM_SImm9: {
+Case_BaseLdurStur:
+      const InstDB::EncodingData::BaseRM_SImm9& opData = InstDB::EncodingData::baseRM_SImm9[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Mem)) {
+        const Mem& m = o1.as<Mem>();
+        rmRel = &m;
+
+        uint32_t x;
+        if (!checkGpType(o0, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, opData.rHiId))
+          goto InvalidPhysId;
+
+        if (m.hasBaseReg() && !m.hasIndex()) {
+          if (m.isOffset64Bit())
+            goto InvalidDisplacement;
+
+          int32_t offset32 = m.offsetLo32() >> opData.immShift;
+          if (Support::shl(offset32, opData.immShift) != m.offsetLo32())
+            goto InvalidDisplacement;
+
+          if (!Support::isInt9(offset32))
+            goto InvalidDisplacement;
+
+          if (m.isFixedOffset()) {
+            opcode.reset(opData.offsetOp());
+          }
+          else {
+            if (!opData.prePostOp())
+              goto InvalidInstruction;
+
+            opcode.reset(opData.prePostOp());
+            opcode.xorImm(m.isPreIndex(), 11);
+          }
+
+          opcode.xorImm(x, opData.xOffset);
+          opcode.addImm(offset32 & 0x1FF, 12);
+          opcode.addReg(o0, 0);
+          goto EmitOp_MemBase_Rn5;
+        }
+
+        goto InvalidAddress;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseRM_SImm10: {
+      const InstDB::EncodingData::BaseRM_SImm10& opData = InstDB::EncodingData::baseRM_SImm10[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Mem)) {
+        const Mem& m = o1.as<Mem>();
+        rmRel = &m;
+
+        uint32_t x;
+        if (!checkGpType(o0, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, opData.rHiId))
+          goto InvalidPhysId;
+
+        if (m.hasBaseReg() && !m.hasIndex()) {
+          if (m.isOffset64Bit())
+            goto InvalidDisplacement;
+
+          int32_t offset32 = m.offsetLo32() >> opData.immShift;
+          if (Support::shl(offset32, opData.immShift) != m.offsetLo32())
+            goto InvalidDisplacement;
+
+          if (!Support::isInt10(offset32))
+            goto InvalidDisplacement;
+
+          if (m.isPostIndex())
+            goto InvalidAddress;
+
+          // Offset has 10 bits, sign is stored in the 10th bit.
+          offset32 &= 0x3FF;
+
+          opcode.reset(opData.opcode());
+          opcode.xorImm(m.isPreIndex(), 11);
+          opcode.xorImm(x, opData.xOffset);
+          opcode.addImm(offset32 >> 9, 22);
+          opcode.addImm(offset32, 12);
+          opcode.addReg(o0, 0);
+          goto EmitOp_MemBase_Rn5;
+        }
+
+        goto InvalidAddress;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseAtomicOp: {
+      const InstDB::EncodingData::BaseAtomicOp& opData = InstDB::EncodingData::baseAtomicOp[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Mem)) {
+        const Mem& m = o2.as<Mem>();
+        uint32_t x;
+
+        if (!checkGpType(o0, opData.rType, &x) || !checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidInstruction;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, opData.xOffset);
+        opcode.addReg(o0, 16);
+        opcode.addReg(o1, 0);
+
+        rmRel = &m;
+        goto EmitOp_MemBaseNoImm_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseAtomicSt: {
+      const InstDB::EncodingData::BaseAtomicSt& opData = InstDB::EncodingData::baseAtomicSt[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Mem)) {
+        const Mem& m = o1.as<Mem>();
+        uint32_t x;
+
+        if (!checkGpType(o0, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, opData.xOffset);
+        opcode.addReg(o0, 16);
+        opcode.addReg(Gp::kIdZr, 0);
+
+        rmRel = &m;
+        goto EmitOp_MemBaseNoImm_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseAtomicCasp: {
+      const InstDB::EncodingData::BaseAtomicCasp& opData = InstDB::EncodingData::baseAtomicCasp[encodingIndex];
+      const Operand_& o4 = opExt[EmitterUtils::kOp4];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg) && o4.isMem()) {
+        const Mem& m = o4.as<Mem>();
+        uint32_t x;
+
+        if (!checkGpType(o0, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1, o2, o3))
+          goto InvalidInstruction;
+
+        if (!checkEven(o0, o2) || !checkGpId(o0, o2, kZR))
+          goto InvalidPhysId;
+
+        if (!checkConsecutive(o0, o1) || !checkConsecutive(o2, o3))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, opData.xOffset);
+        opcode.addReg(o0, 16);
+        opcode.addReg(o2, 0);
+
+        rmRel = &m;
+        goto EmitOp_MemBaseNoImm_Rn5;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [FSimd - Instructions]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingFSimdSV: {
+      const InstDB::EncodingData::FSimdSV& opData = InstDB::EncodingData::fSimdSV[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        uint32_t q = diff(o1.as<Reg>().type(), RegType::kARM_VecD);
+        if (q > 1)
+          goto InvalidInstruction;
+
+        if (o0.as<Vec>().hasElementType())
+          goto InvalidInstruction;
+
+        // This operation is only defined for:
+        //   hD, vS.{4|8}h (16-bit)
+        //   sD, vS.4s     (32-bit)
+        uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+        uint32_t elementSz = o1.as<Vec>().elementType() - Vec::kElementTypeH;
+
+        // Size greater than 1 means 64-bit elements, not supported.
+        if ((sz | elementSz) > 1 || sz != elementSz)
+          goto InvalidInstruction;
+
+        // Size 1 (32-bit float) requires at least 4 elements.
+        if (sz && !q)
+          goto InvalidInstruction;
+
+        // Bit flipping according to sz.
+        static const uint32_t szBits[] = { B(29), 0 };
+
+        opcode.reset(opData.opcode << 10);
+        opcode ^= szBits[sz];
+        opcode.addImm(q, 30);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingFSimdVV: {
+      const InstDB::EncodingData::FSimdVV& opData = InstDB::EncodingData::fSimdVV[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        if (!matchSignature(o0, o1, instFlags))
+          goto InvalidInstruction;
+
+        if (!pickFpOpcode(o0.as<Vec>(), opData.scalarOp(), opData.scalarHf(), opData.vectorOp(), opData.vectorHf(), &opcode))
+          goto InvalidInstruction;
+
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingFSimdVVV: {
+      const InstDB::EncodingData::FSimdVVV& opData = InstDB::EncodingData::fSimdVVV[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (!matchSignature(o0, o1, o2, instFlags))
+          goto InvalidInstruction;
+
+        if (!pickFpOpcode(o0.as<Vec>(), opData.scalarOp(), opData.scalarHf(), opData.vectorOp(), opData.vectorHf(), &opcode))
+          goto InvalidInstruction;
+
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingFSimdVVVe: {
+      const InstDB::EncodingData::FSimdVVVe& opData = InstDB::EncodingData::fSimdVVVe[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (!o2.as<Vec>().hasElementIndex()) {
+          if (!matchSignature(o0, o1, o2, instFlags))
+            goto InvalidInstruction;
+
+          if (!pickFpOpcode(o0.as<Vec>(), opData.scalarOp(), opData.scalarHf(), opData.vectorOp(), opData.vectorHf(), &opcode))
+            goto InvalidInstruction;
+
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+        else {
+          if (!matchSignature(o0, o1, instFlags))
+            goto InvalidInstruction;
+
+          uint32_t q = o1.as<Reg>().isVecQ();
+          uint32_t sz;
+
+          if (!pickFpOpcode(o0.as<Vec>(), opData.elementScalarOp(), InstDB::kHF_D, opData.elementVectorOp(), InstDB::kHF_D, &opcode, &sz))
+            goto InvalidInstruction;
+
+          if (sz == 0 && o2.as<Reg>().id() > 15)
+            goto InvalidPhysId;
+
+          uint32_t elementIndex = o2.as<Vec>().elementIndex();
+          if (elementIndex > (7u >> sz))
+            goto InvalidElementIndex;
+
+          uint32_t hlm = elementIndex << sz;
+          opcode.addImm(q, 30);
+          opcode.addImm(hlm & 3u, 20);
+          opcode.addImm(hlm >> 2, 11);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingFSimdVVVV: {
+      const InstDB::EncodingData::FSimdVVVV& opData = InstDB::EncodingData::fSimdVVVV[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        if (!matchSignature(o0, o1, o2, o3, instFlags))
+          goto InvalidInstruction;
+
+        if (!pickFpOpcode(o0.as<Vec>(), opData.scalarOp(), opData.scalarHf(), opData.vectorOp(), opData.vectorHf(), &opcode))
+          goto InvalidInstruction;
+
+        goto EmitOp_Rd0_Rn5_Rm16_Ra10;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFcadd: {
+      const InstDB::EncodingData::SimdFcadd& opData = InstDB::EncodingData::simdFcadd[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        if (!checkSignature(o0, o1, o2) || o0.as<Vec>().hasElementIndex())
+          goto InvalidInstruction;
+
+        uint32_t q = diff(o0.as<Reg>().type(), RegType::kARM_VecD);
+        if (q > 1)
+          goto InvalidInstruction;
+
+        uint32_t sz = o0.as<Vec>().elementType() - Vec::kElementTypeB;
+        if (sz == 0 || sz > 3)
+          goto InvalidInstruction;
+
+        // 0 <- 90deg.
+        // 1 <- 270deg.
+        uint32_t rot = 0;
+        if (o3.as<Imm>().value() == 270)
+          rot = 1;
+        else if (o3.as<Imm>().value() != 90)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(q, 30);
+        opcode.addImm(sz, 22);
+        opcode.addImm(rot, 12);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFccmpFccmpe: {
+      const InstDB::EncodingData::SimdFccmpFccmpe& opData = InstDB::EncodingData::simdFccmpFccmpe[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+        uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+        if (sz > 2)
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1) || o0.as<Vec>().hasElementType())
+          goto InvalidInstruction;
+
+        uint64_t nzcv = o2.as<Imm>().valueAs<uint64_t>();
+        uint64_t cond = o3.as<Imm>().valueAs<uint64_t>();
+
+        if ((nzcv | cond) > 0xFu)
+          goto InvalidImmediate;
+
+        uint32_t type = (sz - 1) & 0x3u;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(type, 22);
+        opcode.addImm(condCodeToOpcodeCond(uint32_t(cond)), 12);
+        opcode.addImm(nzcv, 0);
+
+        goto EmitOp_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFcm: {
+      const InstDB::EncodingData::SimdFcm& opData = InstDB::EncodingData::simdFcm[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg) && opData.hasRegisterOp()) {
+        if (!matchSignature(o0, o1, o2, instFlags))
+          goto InvalidInstruction;
+
+        if (!pickFpOpcode(o0.as<Vec>(), opData.registerScalarOp(), opData.registerScalarHf(), opData.registerVectorOp(), opData.registerVectorHf(), &opcode))
+          goto InvalidInstruction;
+
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm) && opData.hasZeroOp()) {
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (o2.as<Imm>().value() != 0 || o2.as<Imm>().predicate() != 0)
+          goto InvalidImmediate;
+
+        if (!pickFpOpcode(o0.as<Vec>(), opData.zeroScalarOp(), InstDB::kHF_B, opData.zeroVectorOp(), InstDB::kHF_B, &opcode))
+          goto InvalidInstruction;
+
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFcmla: {
+      const InstDB::EncodingData::SimdFcmla& opData = InstDB::EncodingData::simdFcmla[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        uint32_t q = diff(o0.as<Reg>().type(), RegType::kARM_VecD);
+        if (q > 1)
+          goto InvalidInstruction;
+
+        uint32_t sz = o0.as<Vec>().elementType() - Vec::kElementTypeB;
+        if (sz == 0 || sz > 3)
+          goto InvalidInstruction;
+
+        uint32_t rot = 0;
+        switch (o3.as<Imm>().value()) {
+          case 0  : rot = 0; break;
+          case 90 : rot = 1; break;
+          case 180: rot = 2; break;
+          case 270: rot = 3; break;
+          default:
+            goto InvalidImmediate;
+        }
+
+        if (!o2.as<Vec>().hasElementIndex()) {
+          if (!checkSignature(o1, o2))
+            goto InvalidInstruction;
+
+          opcode.reset(opData.regularOp());
+          opcode.addImm(q, 30);
+          opcode.addImm(sz, 22);
+          opcode.addImm(rot, 11);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+        else {
+          if (o0.as<Vec>().elementType() != o2.as<Vec>().elementType())
+            goto InvalidInstruction;
+
+          // Only allowed vectors are: 4H, 8H, and 4S.
+          if (!(sz == 1 || (q == 1 && sz == 2)))
+            goto InvalidInstruction;
+
+          // Element index ranges:
+          //   4H - ElementIndex[0..1] (index 2..3 is UNDEFINED).
+          //   8H - ElementIndex[0..3].
+          //   4S - ElementIndex[0..1].
+          uint32_t elementIndex = o2.as<Vec>().elementIndex();
+          uint32_t hlFieldShift = sz == 1 ? 0u : 1u;
+          uint32_t maxElementIndex = q == 1 && sz == 1 ? 3u : 1u;
+
+          if (elementIndex > maxElementIndex)
+            goto InvalidElementIndex;
+
+          uint32_t hl = elementIndex << hlFieldShift;
+
+          opcode.reset(opData.elementOp());
+          opcode.addImm(q, 30);
+          opcode.addImm(sz, 22);
+          opcode.addImm(hl & 1u, 21); // L field.
+          opcode.addImm(hl >> 1, 11); // H field.
+          opcode.addImm(rot, 13);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFcmpFcmpe: {
+      const InstDB::EncodingData::SimdFcmpFcmpe& opData = InstDB::EncodingData::simdFcmpFcmpe[encodingIndex];
+
+      uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+      uint32_t type = (sz - 1) & 0x3u;
+
+      if (sz > 2)
+        goto InvalidInstruction;
+
+      if (o0.as<Vec>().hasElementType())
+        goto InvalidInstruction;
+
+      opcode.reset(opData.opcode());
+      opcode.addImm(type, 22);
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        goto EmitOp_Rn5_Rm16;
+      }
+
+      if (isign4 == ENC_OPS2(Reg, Imm)) {
+        if (o1.as<Imm>().value() != 0 || o1.as<Imm>().predicate() != 0)
+          goto InvalidInstruction;
+
+        opcode |= B(3);
+        goto EmitOp_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFcsel: {
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        if (!checkSignature(o0, o1, o2))
+          goto InvalidInstruction;
+
+        uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+        uint32_t type = (sz - 1) & 0x3u;
+
+        if (sz > 2 || o0.as<Vec>().hasElementType())
+          goto InvalidInstruction;
+
+        uint64_t cond = o3.as<Imm>().valueAs<uint64_t>();
+        if (cond > 0xFu)
+          goto InvalidImmediate;
+
+        opcode.reset(0b00011110001000000000110000000000);
+        opcode.addImm(type, 22);
+        opcode.addImm(condCodeToOpcodeCond(uint32_t(cond)), 12);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFcvt: {
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        uint32_t dstSz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+        uint32_t srcSz = diff(o1.as<Reg>().type(), RegType::kARM_VecH);
+
+        if ((dstSz | srcSz) > 3)
+          goto InvalidInstruction;
+
+        if (o0.as<Vec>().hasElementType() || o1.as<Vec>().hasElementType())
+          goto InvalidInstruction;
+
+        // Table that provides 'type' and 'opc' according to the dst/src combination.
+        static const uint8_t table[] = {
+          0xFFu, // H <- H (Invalid).
+          0x03u, // H <- S (type=00 opc=11).
+          0x13u, // H <- D (type=01 opc=11).
+          0xFFu, // H <- Q (Invalid).
+          0x30u, // S <- H (type=11 opc=00).
+          0xFFu, // S <- S (Invalid).
+          0x10u, // S <- D (type=01 opc=00).
+          0xFFu, // S <- Q (Invalid).
+          0x31u, // D <- H (type=11 opc=01).
+          0x01u, // D <- S (type=00 opc=01).
+          0xFFu, // D <- D (Invalid).
+          0xFFu, // D <- Q (Invalid).
+          0xFFu, // Q <- H (Invalid).
+          0xFFu, // Q <- S (Invalid).
+          0xFFu, // Q <- D (Invalid).
+          0xFFu  // Q <- Q (Invalid).
+        };
+
+        uint32_t typeOpc = table[(dstSz << 2) | srcSz];
+        opcode.reset(0b0001111000100010010000 << 10);
+        opcode.addImm(typeOpc >> 4, 22);
+        opcode.addImm(typeOpc & 15, 15);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFcvtLN: {
+      const InstDB::EncodingData::SimdFcvtLN& opData = InstDB::EncodingData::simdFcvtLN[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        // Scalar form - only FCVTXN.
+        if (o0.as<Vec>().isVecS() && o1.as<Vec>().isVecD()) {
+          if (!opData.hasScalar())
+            goto InvalidInstruction;
+
+          if (o0.as<Vec>().hasElementType() || o1.as<Vec>().hasElementType())
+            goto InvalidInstruction;
+
+          opcode.reset(opData.scalarOp());
+          opcode |= B(22); // sz bit must be 1, the only supported combination of FCVTXN.
+          goto EmitOp_Rd0_Rn5;
+        }
+
+        opcode.reset(opData.vectorOp());
+
+        const Vec& rL = (instFlags & InstDB::kInstFlagLong) ? o0.as<Vec>() : o1.as<Vec>();
+        const Vec& rN = (instFlags & InstDB::kInstFlagLong) ? o1.as<Vec>() : o0.as<Vec>();
+
+        uint32_t q = diff(rN.type(), RegType::kARM_VecD);
+        if (uint32_t(opcode.hasQ()) != q)
+          goto InvalidInstruction;
+
+        if (rL.isVecS4() && rN.elementType() == Vec::kElementTypeH && !opData.isCvtxn()) {
+          goto EmitOp_Rd0_Rn5;
+        }
+
+        if (rL.isVecD2() && rN.elementType() == Vec::kElementTypeS) {
+          opcode |= B(22);
+          goto EmitOp_Rd0_Rn5;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFcvtSV: {
+      const InstDB::EncodingData::SimdFcvtSV& opData = InstDB::EncodingData::simdFcvtSV[encodingIndex];
+
+      // So we can support both IntToFloat and FloatToInt conversions.
+      const Operand_& oGp = opData.isFloatToInt() ? o0 : o1;
+      const Operand_& oVec = opData.isFloatToInt() ? o1 : o0;
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        if (oGp.as<Reg>().isGp() && oVec.as<Reg>().isVec()) {
+          uint32_t x = oGp.as<Reg>().isGpX();
+          uint32_t type = diff(oVec.as<Reg>().type(), RegType::kARM_VecH);
+
+          if (type > 2u)
+            goto InvalidInstruction;
+
+          type = (type - 1u) & 0x3;
+          opcode.reset(opData.generalOp());
+          opcode.addImm(type, 22);
+          opcode.addImm(x, 31);
+          goto EmitOp_Rd0_Rn5;
+        }
+
+        if (o0.as<Reg>().isVec() && o1.as<Reg>().isVec()) {
+          if (!checkSignature(o0, o1))
+            goto InvalidInstruction;
+
+          if (!pickFpOpcode(o0.as<Vec>(), opData.scalarIntOp(), InstDB::kHF_B, opData.vectorIntOp(), InstDB::kHF_B, &opcode))
+            goto InvalidInstruction;
+
+          goto EmitOp_Rd0_Rn5;
+        }
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm) && opData.isFixedPoint()) {
+        if (o2.as<Imm>().valueAs<uint64_t>() >= 64)
+          goto InvalidInstruction;
+
+        uint32_t scale = o2.as<Imm>().valueAs<uint32_t>();
+        if (scale == 0)
+          goto InvalidInstruction;
+
+        if (oGp.as<Reg>().isGp() && oVec.as<Reg>().isVec()) {
+          uint32_t x = oGp.as<Reg>().isGpX();
+          uint32_t type = diff(oVec.as<Reg>().type(), RegType::kARM_VecH);
+
+          uint32_t scaleLimit = 32u << x;
+          if (scale > scaleLimit)
+            goto InvalidInstruction;
+
+          type = (type - 1u) & 0x3;
+          opcode.reset(opData.generalOp() ^ B(21));
+          opcode.addImm(type, 22);
+          opcode.addImm(x, 31);
+          opcode.addImm(64u - scale, 10);
+          goto EmitOp_Rd0_Rn5;
+        }
+
+        if (o0.as<Reg>().isVec() && o1.as<Reg>().isVec()) {
+          if (!checkSignature(o0, o1))
+            goto InvalidInstruction;
+
+          uint32_t sz;
+          if (!pickFpOpcode(o0.as<Vec>(), opData.scalarFpOp(), InstDB::kHF_0, opData.vectorFpOp(), InstDB::kHF_0, &opcode, &sz))
+            goto InvalidInstruction;
+
+          uint32_t scaleLimit = 16u << sz;
+          if (scale > scaleLimit)
+            goto InvalidInstruction;
+
+          uint32_t imm = Support::neg(scale) & Support::lsbMask<uint32_t>(sz + 4 + 1);
+          opcode.addImm(imm, 16);
+          goto EmitOp_Rd0_Rn5;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFmlal: {
+      const InstDB::EncodingData::SimdFmlal& opData = InstDB::EncodingData::simdFmlal[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        uint32_t q = diff(o0.as<Reg>().type(), RegType::kARM_VecD);
+        uint32_t qIsOptional = opData.optionalQ();
+
+        if (qIsOptional) {
+          // This instruction works with either 64-bit or 128-bit registers,
+          // encoded by Q bit.
+          if (q > 1)
+            goto InvalidInstruction;
+        }
+        else {
+          // This instruction requires 128-bit vector registers.
+          if (q != 1)
+            goto InvalidInstruction;
+
+          // The instruction is ehtier B (bottom) or T (top), which is part of
+          // the opcode, which uses Q bit, so we have to clear it explicitly.
+          q = 0;
+        }
+
+        if (uint32_t(o0.as<Reg>().type()) != uint32_t(o1.as<Reg>().type()) + qIsOptional ||
+            o0.as<Vec>().elementType() != opData.tA ||
+            o1.as<Vec>().elementType() != opData.tB)
+          goto InvalidInstruction;
+
+        if (!o2.as<Vec>().hasElementIndex()) {
+          if (!checkSignature(o1, o2))
+            goto InvalidInstruction;
+
+          opcode.reset(opData.vectorOp());
+          opcode.addImm(q, 30);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+        else {
+          if (o2.as<Vec>().elementType() != opData.tElement)
+            goto InvalidInstruction;
+
+          if (o2.as<Reg>().id() > 15)
+            goto InvalidPhysId;
+
+          uint32_t elementIndex = o2.as<Vec>().elementIndex();
+          if (elementIndex > 7u)
+            goto InvalidElementIndex;
+
+          opcode.reset(opData.elementOp());
+          opcode.addImm(q, 30);
+          opcode.addImm(elementIndex & 3u, 20);
+          opcode.addImm(elementIndex >> 2, 11);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFmov: {
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        // FMOV Gp <-> Vec opcode:
+        opcode.reset(0b00011110001001100000000000000000);
+
+        if (o0.as<Reg>().isGp() && o1.as<Reg>().isVec()) {
+          // FMOV Wd, Hn      (sf=0 type=11 rmode=00 op=110)
+          // FMOV Xd, Hn      (sf=1 type=11 rmode=00 op=110)
+          // FMOV Wd, Sn      (sf=0 type=00 rmode=00 op=110)
+          // FMOV Xd, Dn      (sf=1 type=11 rmode=00 op=110)
+          // FMOV Xd, Vn.d[1] (sf=1 type=10 rmode=01 op=110)
+          uint32_t x = o0.as<Reg>().isGpX();
+          uint32_t sz = diff(o1.as<Reg>().type(), RegType::kARM_VecH);
+
+          uint32_t type = (sz - 1) & 0x3u;
+          uint32_t rModeOp = 0b00110;
+
+          if (o1.as<Vec>().hasElementIndex()) {
+            // Special case.
+            if (!x || !o1.as<Vec>().isVecD2() || o1.as<Vec>().elementIndex() != 1)
+              goto InvalidInstruction;
+            type = 0b10;
+            rModeOp = 0b01110;
+          }
+          else {
+            // Must be scalar.
+            if (sz > 2)
+              goto InvalidInstruction;
+
+            if (o1.as<Vec>().hasElementType())
+              goto InvalidInstruction;
+
+            if (o1.as<Vec>().isVecS() && x)
+              goto InvalidInstruction;
+
+            if (o1.as<Vec>().isVecD() && !x)
+              goto InvalidInstruction;
+          }
+
+          opcode.addImm(x, 31);
+          opcode.addImm(type, 22);
+          opcode.addImm(rModeOp, 16);
+          goto EmitOp_Rd0_Rn5;
+        }
+
+        if (o0.as<Reg>().isVec() && o1.as<Reg>().isGp()) {
+          // FMOV Hd, Wn      (sf=0 type=11 rmode=00 op=111)
+          // FMOV Hd, Xn      (sf=1 type=11 rmode=00 op=111)
+          // FMOV Sd, Wn      (sf=0 type=00 rmode=00 op=111)
+          // FMOV Dd, Xn      (sf=1 type=11 rmode=00 op=111)
+          // FMOV Vd.d[1], Xn (sf=1 type=10 rmode=01 op=111)
+          uint32_t x = o1.as<Reg>().isGpX();
+          uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+
+          uint32_t type = (sz - 1) & 0x3u;
+          uint32_t rModeOp = 0b00111;
+
+          if (o0.as<Vec>().hasElementIndex()) {
+            // Special case.
+            if (!x || !o0.as<Vec>().isVecD2() || o0.as<Vec>().elementIndex() != 1)
+              goto InvalidInstruction;
+            type = 0b10;
+            rModeOp = 0b01111;
+          }
+          else {
+            // Must be scalar.
+            if (sz > 2)
+              goto InvalidInstruction;
+
+            if (o0.as<Vec>().hasElementType())
+              goto InvalidInstruction;
+
+            if (o0.as<Vec>().isVecS() && x)
+              goto InvalidInstruction;
+
+            if (o0.as<Vec>().isVecD() && !x)
+              goto InvalidInstruction;
+          }
+
+          opcode.addImm(x, 31);
+          opcode.addImm(type, 22);
+          opcode.addImm(rModeOp, 16);
+          goto EmitOp_Rd0_Rn5;
+        }
+
+        if (checkSignature(o0, o1)) {
+          uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+          if (sz > 2)
+            goto InvalidInstruction;
+
+          if (o0.as<Vec>().hasElementType())
+            goto InvalidInstruction;
+
+          uint32_t type = (sz - 1) & 0x3;
+          opcode.reset(0b00011110001000000100000000000000);
+          opcode.addImm(type, 22);
+          goto EmitOp_Rd0_Rn5;
+        }
+      }
+
+      if (isign4 == ENC_OPS2(Reg, Imm)) {
+        if (o0.as<Reg>().isVec()) {
+          double fpValue;
+          if (o1.as<Imm>().isDouble())
+            fpValue = o1.as<Imm>().valueAs<double>();
+          else if (o1.as<Imm>().isInt32())
+            fpValue = o1.as<Imm>().valueAs<int32_t>();
+          else
+            goto InvalidImmediate;
+
+          if (!Utils::isFP64Imm8(fpValue))
+            goto InvalidImmediate;
+
+          uint32_t imm8 = Utils::encodeFP64ToImm8(fpValue);
+          if (!o0.as<Vec>().hasElementType()) {
+            // FMOV (scalar, immediate).
+            uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+            uint32_t type = (sz - 1u) & 0x3u;
+
+            if (sz > 2)
+              goto InvalidInstruction;
+
+            opcode.reset(0b00011110001000000001000000000000);
+            opcode.addImm(type, 22);
+            opcode.addImm(imm8, 13);
+            goto EmitOp_Rd0;
+          }
+          else {
+            uint32_t q = diff(o0.as<Vec>().type(), RegType::kARM_VecD);
+            uint32_t sz = o0.as<Vec>().elementType() - Vec::kElementTypeH;
+
+            if (q > 1 || sz > 2)
+              goto InvalidInstruction;
+
+            static const uint32_t szBits[3] = { B(11), B(0), B(29) };
+            opcode.reset(0b00001111000000001111010000000000);
+            opcode ^= szBits[sz];
+            opcode.addImm(q, 30);
+            opcode.addImm(imm8 >> 5, 16);
+            opcode.addImm(imm8 & 31, 5);
+            goto EmitOp_Rd0;
+          }
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingFSimdPair: {
+      const InstDB::EncodingData::FSimdPair& opData = InstDB::EncodingData::fSimdPair[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        // This operation is only defined for:
+        //   hD, vS.2h (16-bit)
+        //   sD, vS.2s (32-bit)
+        //   dD, vS.2d (64-bit)
+        uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+        if (sz > 2)
+          goto InvalidInstruction;
+
+        static const uint32_t szSignatures[3] = {
+          VecS::kSignature | (Vec::kSignatureElementH),
+          VecD::kSignature | (Vec::kSignatureElementS),
+          VecV::kSignature | (Vec::kSignatureElementD)
+        };
+
+        if (o1.signature() != szSignatures[sz])
+          goto InvalidInstruction;
+
+        static const uint32_t szBits[] = { B(29), 0, B(22) };
+        opcode.reset(opData.scalarOp());
+        opcode ^= szBits[sz];
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (!checkSignature(o0, o1, o2))
+          goto InvalidInstruction;
+
+        uint32_t q = diff(o0.as<Reg>().type(), RegType::kARM_VecD);
+        if (q > 1)
+          goto InvalidInstruction;
+
+        uint32_t sz = o0.as<Vec>().elementType() - Vec::kElementTypeH;
+        if (sz > 2)
+          goto InvalidInstruction;
+
+        static const uint32_t szBits[3] = { B(22) | B(21) | B(15) | B(14), 0, B(22) };
+        opcode.reset(opData.vectorOp());
+        opcode ^= szBits[sz];
+        opcode.addImm(q, 30);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [ISimd - Instructions]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingISimdSV: {
+      const InstDB::EncodingData::ISimdSV& opData = InstDB::EncodingData::iSimdSV[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        // The first destination operand is scalar, which matches element-type of source vectors.
+        uint32_t L = (instFlags & InstDB::kInstFlagLong) != 0;
+        if (diff(o0.as<Vec>().type(), RegType::kARM_VecB) != o1.as<Vec>().elementType() - Vec::kElementTypeB + L)
+          goto InvalidInstruction;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, o1.as<Reg>().type(), o1.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(sizeOp.q(), 30);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdVV: {
+      const InstDB::EncodingData::ISimdVV& opData = InstDB::EncodingData::iSimdVV[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        const Operand_& sop = significantSimdOp(o0, o1, instFlags);
+        if (!matchSignature(o0, o1, instFlags))
+          goto InvalidInstruction;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, sop.as<Reg>().type(), sop.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdVVx: {
+      const InstDB::EncodingData::ISimdVVx& opData = InstDB::EncodingData::iSimdVVx[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        if (o0.signature() != opData.op0Signature ||
+            o1.signature() != opData.op1Signature)
+          goto InvalidInstruction;
+
+        opcode.reset(opData.opcode());
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdVVV: {
+      const InstDB::EncodingData::ISimdVVV& opData = InstDB::EncodingData::iSimdVVV[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        const Operand_& sop = significantSimdOp(o0, o1, instFlags);
+        if (!matchSignature(o0, o1, o2, instFlags))
+          goto InvalidInstruction;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, sop.as<Reg>().type(), sop.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdVVVx: {
+      const InstDB::EncodingData::ISimdVVVx& opData = InstDB::EncodingData::iSimdVVVx[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (o0.signature() != opData.op0Signature ||
+            o1.signature() != opData.op1Signature ||
+            o2.signature() != opData.op2Signature)
+          goto InvalidInstruction;
+
+        opcode.reset(opData.opcode());
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdWWV: {
+      // Special case for wide add/sub [s|b][add|sub][w]{2}.
+      const InstDB::EncodingData::ISimdWWV& opData = InstDB::EncodingData::iSimdWWV[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, o2.as<Reg>().type(), o2.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1) || !o0.as<Reg>().isVecV() || o0.as<Vec>().elementType() != o2.as<Vec>().elementType() + 1)
+          goto InvalidInstruction;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdVVVe: {
+      const InstDB::EncodingData::ISimdVVVe& opData = InstDB::EncodingData::iSimdVVVe[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        const Operand_& sop = significantSimdOp(o0, o1, instFlags);
+        if (!matchSignature(o0, o1, instFlags))
+          goto InvalidInstruction;
+
+        if (!o2.as<Vec>().hasElementIndex()) {
+          SizeOp sizeOp = armElementTypeToSizeOp(opData.regularVecType, sop.as<Reg>().type(), sop.as<Vec>().elementType());
+          if (!sizeOp.isValid())
+            goto InvalidInstruction;
+
+          if (!checkSignature(o1, o2))
+            goto InvalidInstruction;
+
+          opcode.reset(uint32_t(opData.regularOp) << 10);
+          opcode.addImm(sizeOp.qs(), 30);
+          opcode.addImm(sizeOp.scalar(), 28);
+          opcode.addImm(sizeOp.size(), 22);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+        else {
+          SizeOp sizeOp = armElementTypeToSizeOp(opData.elementVecType, sop.as<Reg>().type(), sop.as<Vec>().elementType());
+          if (!sizeOp.isValid())
+            goto InvalidInstruction;
+
+          uint32_t elementIndex = o2.as<Vec>().elementIndex();
+          LMHImm lmh;
+
+          if (!encodeLMH(sizeOp.size(), elementIndex, &lmh))
+            goto InvalidElementIndex;
+
+          if (o2.as<Reg>().id() > lmh.maxRmId)
+            goto InvalidPhysId;
+
+          opcode.reset(uint32_t(opData.elementOp) << 10);
+          opcode.addImm(sizeOp.q(), 30);
+          opcode.addImm(sizeOp.size(), 22);
+          opcode.addImm(lmh.lm, 20);
+          opcode.addImm(lmh.h, 11);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdVVVI: {
+      const InstDB::EncodingData::ISimdVVVI& opData = InstDB::EncodingData::iSimdVVVI[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        const Operand_& sop = significantSimdOp(o0, o1, instFlags);
+        if (!matchSignature(o0, o1, o2, instFlags))
+          goto InvalidInstruction;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, sop.as<Reg>().type(), sop.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        uint64_t immValue = o3.as<Imm>().valueAs<uint64_t>();
+        uint32_t immSize = opData.immSize;
+
+        if (opData.imm64HasOneBitLess && !sizeOp.q())
+          immSize--;
+
+        uint32_t immMax = 1u << immSize;
+        if (immValue >= immMax)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        opcode.addImm(immValue, opData.immShift);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdVVVV: {
+      const InstDB::EncodingData::ISimdVVVV& opData = InstDB::EncodingData::iSimdVVVV[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        const Operand_& sop = significantSimdOp(o0, o1, instFlags);
+        if (!matchSignature(o0, o1, o2, o3, instFlags))
+          goto InvalidInstruction;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, sop.as<Reg>().type(), sop.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.opcode) << 10);
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5_Rm16_Ra10;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdVVVVx: {
+      const InstDB::EncodingData::ISimdVVVVx& opData = InstDB::EncodingData::iSimdVVVVx[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        if (o0.signature() != opData.op0Signature ||
+            o1.signature() != opData.op1Signature ||
+            o2.signature() != opData.op2Signature ||
+            o3.signature() != opData.op3Signature)
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.opcode) << 10);
+        goto EmitOp_Rd0_Rn5_Rm16_Ra10;
+      }
+
+      break;
+    }
+
+
+    case InstDB::kEncodingISimdPair: {
+      const InstDB::EncodingData::ISimdPair& opData = InstDB::EncodingData::iSimdPair[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg) && opData.opcode2) {
+        if (o0.as<Vec>().isVecD1() && o1.as<Vec>().isVecD2()) {
+          opcode.reset(uint32_t(opData.opcode2) << 10);
+          opcode.addImm(0x3, 22); // size.
+          goto EmitOp_Rd0_Rn5;
+        }
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (!matchSignature(o0, o1, o2, instFlags))
+          goto InvalidInstruction;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.opType3, o0.as<Reg>().type(), o0.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.opcode3) << 10);
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdBicOrr: {
+      const InstDB::EncodingData::SimdBicOrr& opData = InstDB::EncodingData::simdBicOrr[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (!matchSignature(o0, o1, o2, instFlags))
+          goto InvalidInstruction;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(InstDB::kVO_V_B, o0.as<Reg>().type(), o0.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.registerOp) << 10);
+        opcode.addImm(sizeOp.q(), 30);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      if (isign4 == ENC_OPS2(Reg, Imm) || isign4 == ENC_OPS3(Reg, Imm, Imm)) {
+        SizeOp sizeOp = armElementTypeToSizeOp(InstDB::kVO_V_HS, o0.as<Reg>().type(), o0.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        if (o1.as<Imm>().valueAs<uint64_t>() > 0xFFFFFFFFu)
+          goto InvalidImmediate;
+
+        uint32_t imm = o1.as<Imm>().valueAs<uint32_t>();
+        uint32_t shift = 0;
+        uint32_t maxShift = (8u << sizeOp.size()) - 8u;
+
+        if (o2.isImm()) {
+          if (o2.as<Imm>().predicate() != uint32_t(ShiftOp::kLSL))
+            goto InvalidImmediate;
+
+          if (imm > 0xFFu || o2.as<Imm>().valueAs<uint64_t>() > maxShift)
+            goto InvalidImmediate;
+
+          shift = o2.as<Imm>().valueAs<uint32_t>();
+          if ((shift & 0x7u) != 0u)
+            goto InvalidImmediate;
+        }
+        else if (imm) {
+          shift = Support::ctz(imm) & 0x7u;
+          imm >>= shift;
+
+          if (imm > 0xFFu || shift > maxShift)
+            goto InvalidImmediate;
+        }
+
+        uint32_t cmode = 0x1u | ((shift / 8u) << 1);
+        if (sizeOp.size() == 1)
+          cmode |= B(3);
+
+        // The immediate value is split into ABC and DEFGH parts.
+        uint32_t abc = (imm >> 5) & 0x7u;
+        uint32_t defgh = imm & 0x1Fu;
+
+        opcode.reset(uint32_t(opData.immediateOp) << 10);
+        opcode.addImm(sizeOp.q(), 30);
+        opcode.addImm(abc, 16);
+        opcode.addImm(cmode, 12);
+        opcode.addImm(defgh, 5);
+        goto EmitOp_Rd0;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdCmp: {
+      const InstDB::EncodingData::SimdCmp& opData = InstDB::EncodingData::simdCmp[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg) && opData.regOp) {
+        if (!matchSignature(o0, o1, o2, instFlags))
+          goto InvalidInstruction;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, o0.as<Reg>().type(), o0.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.regOp) << 10);
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm) && opData.zeroOp) {
+        if (!matchSignature(o0, o1, instFlags))
+          goto InvalidInstruction;
+
+        if (o2.as<Imm>().value() != 0)
+          goto InvalidImmediate;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, o0.as<Reg>().type(), o0.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.zeroOp) << 10);
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdDot: {
+      const InstDB::EncodingData::SimdDot& opData = InstDB::EncodingData::simdDot[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        uint32_t q = diff(o0.as<Reg>().type(), RegType::kARM_VecD);
+        uint32_t size = 2;
+
+        if (q > 1u)
+          goto InvalidInstruction;
+
+        if (!o2.as<Vec>().hasElementIndex()) {
+          if (!opData.vectorOp)
+            goto InvalidInstruction;
+
+          if (o0.as<Reg>().type() != o1.as<Reg>().type() || o1.as<Reg>().type() != o2.as<Reg>().type())
+            goto InvalidInstruction;
+
+          if (o0.as<Vec>().elementType() != opData.tA ||
+              o1.as<Vec>().elementType() != opData.tB ||
+              o2.as<Vec>().elementType() != opData.tB)
+            goto InvalidInstruction;
+
+          opcode.reset(uint32_t(opData.vectorOp) << 10);
+          opcode.addImm(q, 30);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+        else {
+          if (!opData.elementOp)
+            goto InvalidInstruction;
+
+          if (o0.as<Reg>().type() != o1.as<Reg>().type() || !o2.as<Reg>().isVecV())
+            goto InvalidInstruction;
+
+          if (o0.as<Vec>().elementType() != opData.tA ||
+              o1.as<Vec>().elementType() != opData.tB ||
+              o2.as<Vec>().elementType() != opData.tElement)
+            goto InvalidInstruction;
+
+          uint32_t elementIndex = o2.as<Vec>().elementIndex();
+          LMHImm lmh;
+
+          if (!encodeLMH(size, elementIndex, &lmh))
+            goto InvalidElementIndex;
+
+          if (o2.as<Reg>().id() > lmh.maxRmId)
+            goto InvalidPhysId;
+
+          opcode.reset(uint32_t(opData.elementOp) << 10);
+          opcode.addImm(q, 30);
+          opcode.addImm(lmh.lm, 20);
+          opcode.addImm(lmh.h, 11);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdDup: SimdDup: {
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        // Truth table of valid encodings of `Q:1|ElementType:3`
+        uint32_t kValidEncodings = B(Vec::kElementTypeB + 0) |
+                                   B(Vec::kElementTypeH + 0) |
+                                   B(Vec::kElementTypeS + 0) |
+                                   B(Vec::kElementTypeB + 8) |
+                                   B(Vec::kElementTypeH + 8) |
+                                   B(Vec::kElementTypeS + 8) |
+                                   B(Vec::kElementTypeD + 8) ;
+
+        uint32_t q = diff(o0.as<Reg>().type(), RegType::kARM_VecD);
+
+        if (o1.as<Reg>().isGp()) {
+          // DUP - Vec (scalar|vector) <- GP register.
+          //
+          // NOTE: This is only scalar for `dup d, x` case, otherwise the value
+          // would be duplicated across all vector elements (1, 2, 4, 8, or 16).
+          uint32_t elementType = o0.as<Vec>().elementType();
+          if (q > 1 || !Support::bitTest(kValidEncodings, (q << 3) | elementType))
+            goto InvalidInstruction;
+
+          uint32_t lsbIndex = elementType - 1u;
+          uint32_t imm5 = 1u << lsbIndex;
+
+          opcode.reset(0b0000111000000000000011 << 10);
+          opcode.addImm(q, 30);
+          opcode.addImm(imm5, 16);
+          goto EmitOp_Rd0_Rn5;
+        }
+
+        if (!o1.as<Reg>().isVec() || !o1.as<Vec>().hasElementIndex())
+          goto InvalidInstruction;
+
+        uint32_t dstIndex = o1.as<Vec>().elementIndex();
+        if (!o0.as<Vec>().hasElementType()) {
+          // DUP - Vec (scalar) <- Vec[N].
+          uint32_t lsbIndex = diff(o0.as<Reg>().type(), RegType::kARM_VecB);
+
+          if (lsbIndex != o1.as<Vec>().elementType() - Vec::kElementTypeB || lsbIndex > 3)
+            goto InvalidInstruction;
+
+          uint32_t imm5 = ((dstIndex << 1) | 1u) << lsbIndex;
+          if (imm5 > 31)
+            goto InvalidElementIndex;
+
+          opcode.reset(0b0101111000000000000001 << 10);
+          opcode.addImm(imm5, 16);
+          goto EmitOp_Rd0_Rn5;
+        }
+        else {
+          // DUP - Vec (all) <- Vec[N].
+          uint32_t elementType = o0.as<Vec>().elementType();
+          if (q > 1 || !Support::bitTest(kValidEncodings, (q << 3) | elementType))
+            goto InvalidInstruction;
+
+          uint32_t lsbIndex = elementType - 1u;
+          uint32_t imm5 = ((dstIndex << 1) | 1u) << lsbIndex;
+
+          if (imm5 > 31)
+            goto InvalidElementIndex;
+
+          opcode.reset(0b0000111000000000000001 << 10);
+          opcode.addImm(q, 30);
+          opcode.addImm(imm5, 16);
+          goto EmitOp_Rd0_Rn5;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdIns: SimdIns: {
+      if (isign4 == ENC_OPS2(Reg, Reg) && o0.as<Reg>().isVecV()) {
+        if (!o0.as<Vec>().hasElementIndex())
+          goto InvalidInstruction;
+
+        uint32_t elementType = o0.as<Vec>().elementType();
+        uint32_t dstIndex = o0.as<Vec>().elementIndex();
+        uint32_t lsbIndex = elementType - 1u;
+
+        uint32_t imm5 = ((dstIndex << 1) | 1u) << lsbIndex;
+        if (imm5 > 31)
+          goto InvalidElementIndex;
+
+        if (o1.as<Reg>().isGp()) {
+          // INS - Vec[N] <- GP register.
+          opcode.reset(0b0100111000000000000111 << 10);
+          opcode.addImm(imm5, 16);
+          goto EmitOp_Rd0_Rn5;
+        }
+        else if (o1.as<Reg>().isVecV() && o1.as<Vec>().hasElementIndex()) {
+          // INS - Vec[N] <- Vec[M].
+          if (o0.as<Vec>().elementType() != o1.as<Vec>().elementType())
+            goto InvalidInstruction;
+
+          uint32_t srcIndex = o1.as<Vec>().elementIndex();
+          if (o0.as<Reg>().type() != o1.as<Reg>().type())
+            goto InvalidInstruction;
+
+          uint32_t imm4 = srcIndex << lsbIndex;
+          if (imm4 > 15)
+            goto InvalidElementIndex;
+
+          opcode.reset(0b0110111000000000000001 << 10);
+          opcode.addImm(imm5, 16);
+          opcode.addImm(imm4, 11);
+          goto EmitOp_Rd0_Rn5;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdMov: {
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        if (o0.as<Reg>().isVec() && o1.as<Reg>().isVec()) {
+          // INS v.x[index], v.x[index].
+          if (o0.as<Vec>().hasElementIndex() && o1.as<Vec>().hasElementIndex())
+            goto SimdIns;
+
+          // DUP {b|h|s|d}, v.{b|h|s|d}[index].
+          if (o1.as<Vec>().hasElementIndex())
+            goto SimdDup;
+
+          if (!checkSignature(o0, o1))
+            goto InvalidInstruction;
+
+          // ORR Vd, Vn, Vm
+          uint32_t q = diff(o0.as<Reg>().type(), RegType::kARM_VecD);
+          if (q > 1)
+            goto InvalidInstruction;
+
+          opcode.reset(0b0000111010100000000111 << 10);
+          opcode.addImm(q, 30);
+          opcode.addReg(o1, 16); // Vn == Vm.
+          goto EmitOp_Rd0_Rn5;
+        }
+
+        if (o0.as<Reg>().isVec() && o1.as<Reg>().isGp()) {
+          // INS v.x[index], Rn.
+          if (o0.as<Vec>().hasElementIndex())
+            goto SimdIns;
+
+          goto InvalidInstruction;
+        }
+
+        if (o0.as<Reg>().isGp() && o1.as<Reg>().isVec()) {
+          // UMOV Rd, V.{s|d}[index].
+          encodingIndex = 1;
+          goto SimdUmov;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdMoviMvni: {
+      const InstDB::EncodingData::SimdMoviMvni& opData = InstDB::EncodingData::simdMoviMvni[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Imm) || isign4 == ENC_OPS3(Reg, Imm, Imm)) {
+        SizeOp sizeOp = armElementTypeToSizeOp(InstDB::kVO_V_Any, o0.as<Reg>().type(), o0.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        uint64_t imm64 = o1.as<Imm>().valueAs<uint64_t>();
+        uint32_t imm8 = 0;
+        uint32_t cmode = 0;
+        uint32_t inverted = opData.inverted;
+        uint32_t op = 0;
+        uint32_t shift = 0;
+        uint32_t shiftOp = uint32_t(ShiftOp::kLSL);
+
+        if (sizeOp.size() == 3u) {
+          // The second immediate should not be present, however, we accept
+          // an immediate value of zero as some user code may still pass it.
+          if (o2.isImm() && o0.as<Imm>().value() != 0)
+            goto InvalidImmediate;
+
+          if (Utils::isByteMaskImm8(imm64)) {
+            imm8 = encodeImm64ByteMaskToImm8(imm64);
+          }
+          else {
+            // Change from D to S and from 64-bit imm to 32-bit imm if this
+            // is not a byte-mask pattern.
+            if ((imm64 >> 32) == (imm64 & 0xFFFFFFFFu)) {
+              imm64 &= 0xFFFFFFFFu;
+              sizeOp.decrementSize();
+            }
+            else {
+              goto InvalidImmediate;
+            }
+          }
+        }
+
+        if (sizeOp.size() < 3u) {
+          if (imm64 > 0xFFFFFFFFu)
+            goto InvalidImmediate;
+          imm8 = uint32_t(imm64);
+
+          if (sizeOp.size() == 2) {
+            if ((imm8 >> 16) == (imm8 & 0xFFFFu)) {
+              imm8 >>= 16;
+              sizeOp.decrementSize();
+            }
+          }
+
+          if (sizeOp.size() == 1) {
+            if (imm8 > 0xFFFFu)
+              goto InvalidImmediate;
+
+            if ((imm8 >> 8) == (imm8 & 0xFFu)) {
+              imm8 >>= 8;
+              sizeOp.decrementSize();
+            }
+          }
+
+          uint32_t maxShift = (8u << sizeOp.size()) - 8u;
+          if (o2.isImm()) {
+            if (imm8 > 0xFFu || o2.as<Imm>().valueAs<uint64_t>() > maxShift)
+              goto InvalidImmediate;
+
+            shift = o2.as<Imm>().valueAs<uint32_t>();
+            shiftOp = o2.as<Imm>().predicate();
+          }
+          else if (imm8) {
+            shift = Support::ctz(imm8) & ~0x7u;
+            imm8 >>= shift;
+
+            if (imm8 > 0xFFu || shift > maxShift)
+              goto InvalidImmediate;
+          }
+
+          if ((shift & 0x7u) != 0u)
+            goto InvalidImmediate;
+        }
+
+        shift /= 8u;
+
+        switch (sizeOp.size()) {
+          case 0:
+            if (shiftOp != uint32_t(ShiftOp::kLSL))
+              goto InvalidImmediate;
+
+            if (inverted) {
+              imm8 = ~imm8 & 0xFFu;
+              inverted = 0;
+            }
+
+            cmode = B(3) | B(2) | B(1);
+            break;
+
+          case 1:
+            if (shiftOp != uint32_t(ShiftOp::kLSL))
+              goto InvalidImmediate;
+
+            cmode = B(3) | (shift << 1);
+            op = inverted;
+            break;
+
+          case 2:
+            if (shiftOp == uint32_t(ShiftOp::kLSL)) {
+              cmode = shift << 1;
+            }
+            else if (shiftOp == uint32_t(ShiftOp::kMSL)) {
+              if (shift == 0 || shift > 2)
+                goto InvalidImmediate;
+              cmode = B(3) | B(2) | (shift - 1u);
+            }
+            else {
+              goto InvalidImmediate;
+            }
+
+            op = inverted;
+            break;
+
+          case 3:
+            if (inverted) {
+              imm8 = ~imm8 & 0xFFu;
+              inverted = 0;
+            }
+
+            op = 1;
+            cmode = B(3) | B(2) | B(1);
+            break;
+        }
+
+        // The immediate value is split into ABC and DEFGH parts.
+        uint32_t abc = (imm8 >> 5) & 0x7u;
+        uint32_t defgh = imm8 & 0x1Fu;
+
+        opcode.reset(uint32_t(opData.opcode) << 10);
+        opcode.addImm(sizeOp.q(), 30);
+        opcode.addImm(op, 29);
+        opcode.addImm(abc, 16);
+        opcode.addImm(cmode, 12);
+        opcode.addImm(defgh, 5);
+        goto EmitOp_Rd0;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdShift: {
+      const InstDB::EncodingData::SimdShift& opData = InstDB::EncodingData::simdShift[encodingIndex];
+
+      const Operand_& sop = significantSimdOp(o0, o1, instFlags);
+      SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, sop.as<Reg>().type(), sop.as<Vec>().elementType());
+
+      if (!sizeOp.isValid())
+        goto InvalidInstruction;
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm) && opData.immediateOp) {
+        if (!matchSignature(o0, o1, instFlags))
+          goto InvalidInstruction;
+
+        if (o2.as<Imm>().valueAs<uint64_t>() > 63)
+          goto InvalidImmediate;
+
+        uint32_t lsbShift = sizeOp.size() + 3u;
+        uint32_t lsbMask = (1u << lsbShift) - 1u;
+        uint32_t imm = o2.as<Imm>().valueAs<uint32_t>();
+
+        // Some instructions use IMM and some X - IMM, so negate if required.
+        if (opData.invertedImm) {
+          if (imm == 0 || imm > (1u << lsbShift))
+            goto InvalidImmediate;
+          imm = Support::neg(imm) & lsbMask;
+        }
+
+        if (imm > lsbMask)
+          goto InvalidImmediate;
+        imm |= (1u << lsbShift);
+
+        opcode.reset(uint32_t(opData.immediateOp) << 10);
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(imm, 16);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg) && opData.registerOp) {
+        if (!matchSignature(o0, o1, o2, instFlags))
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.registerOp) << 10);
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdShiftES: {
+      const InstDB::EncodingData::SimdShiftES& opData = InstDB::EncodingData::simdShiftES[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm)) {
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, o1.as<Reg>().type(), o1.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        if (!matchSignature(o0, o1, instFlags))
+          goto InvalidInstruction;
+
+        // The immediate value must match the element size.
+        uint64_t shift = o2.as<Imm>().valueAs<uint64_t>();
+        uint32_t shiftOp = o2.as<Imm>().predicate();
+
+        if (shift != (8u << sizeOp.size()) || shiftOp != uint32_t(ShiftOp::kLSL))
+          goto InvalidImmediate;
+
+        opcode.reset(uint32_t(opData.opcode) << 10);
+        opcode.addImm(sizeOp.q(), 30);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdSm3tt: {
+      const InstDB::EncodingData::SimdSm3tt& opData = InstDB::EncodingData::simdSm3tt[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (o0.as<Vec>().isVecS4() && o1.as<Vec>().isVecS4() && o2.as<Vec>().isVecS4() && o2.as<Vec>().hasElementIndex()) {
+          uint32_t imm2 = o2.as<Vec>().elementIndex();
+          if (imm2 > 3)
+            goto InvalidElementIndex;
+
+          opcode.reset(uint32_t(opData.opcode) << 10);
+          opcode.addImm(imm2, 12);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+      }
+
+      break;
+    }
+
+
+    case InstDB::kEncodingSimdSmovUmov: SimdUmov: {
+      const InstDB::EncodingData::SimdSmovUmov& opData = InstDB::EncodingData::simdSmovUmov[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg) && o0.as<Reg>().isGp() && o1.as<Reg>().isVec()) {
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, o1.as<Reg>().type(), o1.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        if (!o1.as<Vec>().hasElementIndex())
+          goto InvalidInstruction;
+
+        uint32_t x = o0.as<Gp>().isGpX();
+        uint32_t gpMustBeX = uint32_t(sizeOp.size() >= 3u - opData.isSigned);
+
+        if (opData.isSigned) {
+          if (gpMustBeX && !x)
+            goto InvalidInstruction;
+        }
+        else {
+          if (x != gpMustBeX)
+            goto InvalidInstruction;
+        }
+
+        uint32_t elementIndex = o1.as<Vec>().elementIndex();
+        uint32_t maxElementIndex = 15u >> sizeOp.size();
+
+        if (elementIndex > maxElementIndex)
+          goto InvalidElementIndex;
+
+        uint32_t imm5 = (1u | (elementIndex << 1)) << sizeOp.size();
+
+        opcode.reset(uint32_t(opData.opcode) << 10);
+        opcode.addImm(x, 30);
+        opcode.addImm(imm5, 16);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdSxtlUxtl: {
+      const InstDB::EncodingData::SimdSxtlUxtl& opData = InstDB::EncodingData::simdSxtlUxtl[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, o1.as<Reg>().type(), o1.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        if (!matchSignature(o0, o1, instFlags))
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.opcode) << 10);
+        opcode.addImm(sizeOp.q(), 30);
+        opcode.addImm(1u, sizeOp.size() + 19);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdTblTbx: {
+      const InstDB::EncodingData::SimdTblTbx& opData = InstDB::EncodingData::simdTblTbx[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg) || isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        // TBL/TBX <Vd>.<Ta>, { <Vn>.16B }, <Vm>.<Ta>
+        // TBL/TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B }, <Vm>.<Ta>
+        // TBL/TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B }, <Vm>.<Ta>
+        // TBL/TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B, <Vn+3>.16B }, <Vm>.<Ta>
+        opcode.reset(uint32_t(opData.opcode) << 10);
+
+        const Operand_& o4 = opExt[EmitterUtils::kOp4];
+        const Operand_& o5 = opExt[EmitterUtils::kOp5];
+
+        uint32_t q = diff(o0.as<Reg>().type(), RegType::kARM_VecD);
+        if (q > 1 || o0.as<Vec>().hasElementIndex())
+          goto InvalidInstruction;
+
+        if (!o1.as<Vec>().isVecB16() || o1.as<Vec>().hasElementIndex())
+          goto InvalidInstruction;
+
+        uint32_t len = uint32_t(!o3.isNone()) + uint32_t(!o4.isNone()) + uint32_t(!o5.isNone());
+        opcode.addImm(q, 30);
+        opcode.addImm(len, 13);
+
+        switch (len) {
+          case 0:
+            if (!checkSignature(o0, o2))
+              goto InvalidInstruction;
+
+            if (o2.id() > 31)
+              goto InvalidPhysId;
+
+            opcode.addReg(o2, 16);
+            goto EmitOp_Rd0_Rn5;
+
+          case 1:
+            if (!checkSignature(o0, o3))
+              goto InvalidInstruction;
+
+            if (o3.id() > 31)
+              goto InvalidPhysId;
+
+            opcode.addReg(o3, 16);
+            goto EmitOp_Rd0_Rn5;
+
+          case 2:
+            if (!checkSignature(o0, o4))
+              goto InvalidInstruction;
+
+            if (o4.id() > 31)
+              goto InvalidPhysId;
+
+            opcode.addReg(o4, 16);
+            goto EmitOp_Rd0_Rn5;
+
+          case 3:
+            if (!checkSignature(o0, o5))
+              goto InvalidInstruction;
+
+            if (o5.id() > 31)
+              goto InvalidPhysId;
+
+            opcode.addReg(o5, 16);
+            goto EmitOp_Rd0_Rn5;
+
+          default:
+            // Should never happen.
+            goto InvalidInstruction;
+        }
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Simd - Load / Store]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingSimdLdSt: {
+      const InstDB::EncodingData::SimdLdSt& opData = InstDB::EncodingData::simdLdSt[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Mem)) {
+        const Mem& m = o1.as<Mem>();
+        rmRel = &m;
+
+        // Width  |       SZ |        XY | XSZ
+        // -------+----------+-----------+-----
+        // 8-bit  | size==00 | opc == 01 | 000
+        // 16-bit | size==01 | opc == 01 | 001
+        // 32-bit | size==10 | opc == 01 | 010
+        // 64-bit | size==11 | opc == 01 | 011
+        // 128-bit| size==00 | opc == 11 | 100
+        uint32_t xsz = diff(o0.as<Reg>().type(), RegType::kARM_VecB);
+        if (xsz > 4u || o0.as<Vec>().hasElementIndex())
+          goto InvalidRegType;
+
+        if (!checkVecId(o0))
+          goto InvalidPhysId;
+
+        if (!armCheckMemBaseIndexRel(m))
+          goto InvalidAddress;
+
+        int64_t offset = m.offset();
+        if (m.hasBaseReg()) {
+          // [Base {Offset | Index}]
+          if (m.hasIndex()) {
+            uint32_t opt = armShiftOpToLdStOptMap[m.predicate()];
+            if (opt == 0xFFu)
+              goto InvalidAddress;
+
+            uint32_t shift = m.shift();
+            uint32_t s = (shift != 0);
+
+            if (s && shift != xsz)
+              goto InvalidAddressScale;
+
+            opcode.reset(uint32_t(opData.registerOp) << 21);
+            opcode.addImm(xsz & 3u, 30);
+            opcode.addImm(xsz >> 2, 23);
+            opcode.addImm(opt, 13);
+            opcode.addImm(s, 12);
+            opcode |= B(11);
+            opcode.addReg(o0, 0);
+            goto EmitOp_MemBaseIndex_Rn5_Rm16;
+          }
+
+          // Makes it easier to work with the offset especially on 32-bit arch.
+          if (!Support::isInt32(offset))
+            goto InvalidDisplacement;
+          int32_t offset32 = int32_t(offset);
+
+          if (m.isPreOrPost()) {
+            if (!Support::isInt9(offset32))
+              goto InvalidDisplacement;
+
+            opcode.reset(uint32_t(opData.prePostOp) << 21);
+            opcode.addImm(xsz & 3u, 30);
+            opcode.addImm(xsz >> 2, 23);
+            opcode.addImm(offset32 & 0x1FF, 12);
+            opcode.addImm(m.isPreIndex(), 11);
+            opcode |= B(10);
+            opcode.addReg(o0, 0);
+            goto EmitOp_MemBase_Rn5;
+          }
+          else {
+            uint32_t imm12 = uint32_t(offset32) >> xsz;
+
+            // If this instruction is not encodable with scaled unsigned offset, try unscaled signed offset.
+            if (!Support::isUInt12(imm12) || (imm12 << xsz) != uint32_t(offset32)) {
+              instId = opData.uAltInstId;
+              instInfo = &InstDB::_instInfoTable[instId];
+              encodingIndex = instInfo->_encodingDataIndex;
+              goto Case_SimdLdurStur;
+            }
+
+            opcode.reset(uint32_t(opData.uOffsetOp) << 22);
+            opcode.addImm(xsz & 3u, 30);
+            opcode.addImm(xsz >> 2, 23);
+            opcode.addImm(imm12, 10);
+            opcode.addReg(o0, 0);
+            goto EmitOp_MemBase_Rn5;
+          }
+        }
+        else {
+          if (!opData.literalOp)
+            goto InvalidAddress;
+
+          if (xsz < 2u)
+            goto InvalidRegType;
+
+          uint32_t opc = xsz - 2u;
+          opcode.reset(uint32_t(opData.literalOp) << 24);
+          opcode.addImm(opc, 30);
+          opcode.addReg(o0, 0);
+          offsetFormat.resetToImmValue(OffsetType::kSignedOffset, 4, 5, 19, 2);
+          goto EmitOp_Rel;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdLdpStp: {
+      const InstDB::EncodingData::SimdLdpStp& opData = InstDB::EncodingData::simdLdpStp[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Mem)) {
+        const Mem& m = o2.as<Mem>();
+        rmRel = &m;
+
+        uint32_t opc = diff(o0.as<Reg>().type(), RegType::kARM_VecS);
+        if (opc > 2u || o0.as<Vec>().hasElementTypeOrIndex())
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkVecId(o0, o1))
+          goto InvalidPhysId;
+
+        if (m.baseType() != RegType::kARM_GpX || m.hasIndex())
+          goto InvalidAddress;
+
+        if (m.isOffset64Bit())
+          goto InvalidDisplacement;
+
+        uint32_t offsetShift = 2u + opc;
+        int32_t offset32 = m.offsetLo32() >> offsetShift;
+
+        // Make sure we didn't lose bits by applying the mandatory offset shift.
+        if (Support::shl(offset32, offsetShift) != m.offsetLo32())
+          goto InvalidDisplacement;
+
+        // Offset is encoded as a 7-bit immediate.
+        if (!Support::isInt7(offset32))
+          goto InvalidDisplacement;
+
+        if (m.isPreOrPost() && offset32 != 0) {
+          if (!opData.prePostOp)
+            goto InvalidAddress;
+
+          opcode.reset(uint32_t(opData.prePostOp) << 22);
+          opcode.addImm(m.isPreIndex(), 24);
+        }
+        else {
+          opcode.reset(uint32_t(opData.offsetOp) << 22);
+        }
+
+        opcode.addImm(opc, 30);
+        opcode.addImm(offset32 & 0x7F, 15);
+        opcode.addReg(o1, 10);
+        opcode.addReg(o0, 0);
+        goto EmitOp_MemBase_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdLdurStur: {
+Case_SimdLdurStur:
+      const InstDB::EncodingData::SimdLdurStur& opData = InstDB::EncodingData::simdLdurStur[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Mem)) {
+        const Mem& m = o1.as<Mem>();
+        rmRel = &m;
+
+        uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecB);
+        if (sz > 4 || o0.as<Vec>().hasElementTypeOrIndex())
+          goto InvalidInstruction;
+
+        if (!checkVecId(o0))
+          goto InvalidPhysId;
+
+        if (!armCheckMemBaseIndexRel(m))
+          goto InvalidAddress;
+
+        if (m.hasBaseReg() && !m.hasIndex() && !m.isPreOrPost()) {
+          if (m.isOffset64Bit())
+            goto InvalidDisplacement;
+
+          int32_t offset32 = m.offsetLo32();
+          if (!Support::isInt9(offset32))
+            goto InvalidDisplacement;
+
+          opcode.reset(uint32_t(opData.opcode) << 10);
+          opcode.addImm(sz & 3u, 30);
+          opcode.addImm(sz >> 2, 23);
+          opcode.addImm(offset32 & 0x1FF, 12);
+          opcode.addReg(o0, 0);
+          goto EmitOp_MemBase_Rn5;
+        }
+
+        goto InvalidAddress;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdLdNStN: {
+      const InstDB::EncodingData::SimdLdNStN& opData = InstDB::EncodingData::simdLdNStN[encodingIndex];
+      const Operand_& o4 = opExt[EmitterUtils::kOp4];
+
+      uint32_t n = 1;
+
+      if (isign4 == ENC_OPS2(Reg, Mem)) {
+        if (opData.n != 1)
+          goto InvalidInstruction;
+
+        rmRel = &o1;
+      }
+      else if (isign4 == ENC_OPS3(Reg, Reg, Mem)) {
+        if (opData.n != 1 && opData.n != 2)
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1) || !checkConsecutive(o0, o1))
+          goto InvalidInstruction;
+
+        n = 2;
+        rmRel = &o2;
+      }
+      else if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem) && o4.isNone()) {
+        if (opData.n != 1 && opData.n != 3)
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1, o2) || !checkConsecutive(o0, o1, o2))
+          goto InvalidInstruction;
+
+        n = 3;
+        rmRel = &o3;
+      }
+      else if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg) && o4.isMem()) {
+        if (opData.n != 1 && opData.n != 4)
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1, o2, o3) || !checkConsecutive(o0, o1, o2, o3))
+          goto InvalidInstruction;
+
+        n = 4;
+        rmRel = &o4;
+      }
+      else {
+        goto InvalidInstruction;
+      }
+
+      // We will use `v` and `m` from now as those are relevant for encoding.
+      const Vec& v = o0.as<Vec>();
+      const Mem& m = rmRel->as<Mem>();
+
+      uint32_t q = 0;
+      uint32_t rm = 0;
+      uint32_t rn = m.baseId();
+      uint32_t sz = v.elementType() - Vec::kElementTypeB;
+      uint32_t opcSsize = sz;
+      uint32_t offsetPossibility = 0;
+
+      if (sz > 3)
+        goto InvalidInstruction;
+
+      if (m.baseType() != RegType::kARM_GpX)
+        goto InvalidAddress;
+
+      // Rn cannot be ZR, but can be SP.
+      if (rn > 30 && rn != Gp::kIdSp)
+        goto InvalidAddress;
+
+      rn &= 31;
+
+      if (opData.replicate) {
+        if (n != opData.n)
+          goto InvalidInstruction;
+
+        // Replicates to the whole register, element index cannot be used.
+        if (v.hasElementIndex())
+          goto InvalidInstruction;
+
+        q = diff(v.type(), RegType::kARM_VecD);
+        if (q > 1)
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.singleOp) << 10);
+        offsetPossibility = (1u << sz) * n;
+      }
+      else if (v.hasElementIndex()) {
+        if (n != opData.n)
+          goto InvalidInstruction;
+
+        // LDx/STx (single structure).
+        static const uint8_t opcSsizeBySzS[] = { 0x0u << 3, 0x2u << 3, 0x4u << 3, (0x4u << 3) | 1u };
+
+        opcode.reset(uint32_t(opData.singleOp) << 10);
+        opcSsize = opcSsizeBySzS[sz];
+        offsetPossibility =  (1u << sz) * opData.n;
+
+        uint32_t elementIndex = v.elementIndex();
+        uint32_t maxElementIndex = 15 >> sz;
+
+        if (elementIndex > maxElementIndex)
+          goto InvalidElementIndex;
+
+        elementIndex <<= sz;
+        q = elementIndex >> 3;
+        opcSsize |= elementIndex & 0x7u;
+      }
+      else {
+        // LDx/STx (multiple structures).
+        static const uint8_t opcSsizeByN[] = { 0u, 0x7u << 2, 0xAu << 2, 0x6u << 2, 0x2u << 2 };
+
+        q = diff(v.type(), RegType::kARM_VecD);
+        if (q > 1)
+          goto InvalidInstruction;
+
+        if (opData.n == 1)
+          opcSsize |= opcSsizeByN[n];
+
+        opcode.reset(uint32_t(opData.multipleOp) << 10);
+        offsetPossibility = (8u << q) * n;
+      }
+
+      if (m.hasIndex()) {
+        if (m.hasOffset() || !m.isPostIndex())
+          goto InvalidAddress;
+
+        rm = m.indexId();
+        if (rm > 30)
+          goto InvalidAddress;
+
+        // Bit 23 - PostIndex.
+        opcode |= B(23);
+      }
+      else {
+        if (m.hasOffset()) {
+          if (m.offset() != int32_t(offsetPossibility) || !m.isPostIndex())
+            goto InvalidAddress;
+          rm = 31;
+
+          // Bit 23 - PostIndex.
+          opcode |= B(23);
+        }
+      }
+
+      opcode.addImm(q, 30);
+      opcode.addImm(rm, 16);
+      opcode.addImm(opcSsize, 10);
+      opcode.addImm(rn, 5);
+      goto EmitOp_Rd0;
+    }
+
+    default:
+      break;
+  }
+
+  goto InvalidInstruction;
+
+  // --------------------------------------------------------------------------
+  // [EmitGp - Single]
+  // --------------------------------------------------------------------------
+
+EmitOp_Rd0:
+  if (!checkValidRegs(o0))
+    goto InvalidPhysId;
+
+  opcode.addReg(o0, 0);
+  goto EmitOp;
+
+EmitOp_Rn5:
+  if (!checkValidRegs(o0))
+    goto InvalidPhysId;
+
+  opcode.addReg(o0, 5);
+  goto EmitOp;
+
+EmitOp_Rn5_Rm16:
+  if (!checkValidRegs(o0, o1))
+    goto InvalidPhysId;
+
+  opcode.addReg(o0, 5);
+  opcode.addReg(o1, 16);
+  goto EmitOp;
+
+EmitOp_Rd0_Rn5:
+  if (!checkValidRegs(o0, o1))
+    goto InvalidPhysId;
+
+  opcode.addReg(o0, 0);
+  opcode.addReg(o1, 5);
+  goto EmitOp;
+
+EmitOp_Rd0_Rn5_Rm16_Ra10:
+  if (!checkValidRegs(o0, o1, o2, o3))
+    goto InvalidPhysId;
+
+  opcode.addReg(o0, 0);
+  opcode.addReg(o1, 5);
+  opcode.addReg(o2, 16);
+  opcode.addReg(o3, 10);
+  goto EmitOp;
+
+EmitOp_Rd0_Rn5_Rm16:
+  if (!checkValidRegs(o0, o1, o3))
+    goto InvalidPhysId;
+
+  opcode.addReg(o0, 0);
+  opcode.addReg(o1, 5);
+  opcode.addReg(o2, 16);
+  goto EmitOp;
+
+  // --------------------------------------------------------------------------
+  // [EmitGp - Multiple]
+  // --------------------------------------------------------------------------
+
+EmitOp_Multiple:
+  {
+    ASMJIT_ASSERT(multipleOpCount > 0);
+    err = writer.ensureSpace(this, multipleOpCount * 4u);
+    if (ASMJIT_UNLIKELY(err))
+      goto Failed;
+
+    for (uint32_t i = 0; i < multipleOpCount; i++)
+      writer.emit32uLE(multipleOpData[i]);
+
+    goto EmitDone;
+  }
+
+  // --------------------------------------------------------------------------
+  // [EmitGp - Memory]
+  // --------------------------------------------------------------------------
+
+EmitOp_MemBase_Rn5:
+  if (!checkMemBase(rmRel->as<Mem>()))
+    goto InvalidAddress;
+
+  opcode.addReg(rmRel->as<Mem>().baseId(), 5);
+  goto EmitOp;
+
+EmitOp_MemBaseNoImm_Rn5:
+  if (!checkMemBase(rmRel->as<Mem>()) || rmRel->as<Mem>().hasIndex())
+    goto InvalidAddress;
+
+  if (rmRel->as<Mem>().hasOffset())
+    goto InvalidDisplacement;
+
+  opcode.addReg(rmRel->as<Mem>().baseId(), 5);
+  goto EmitOp;
+
+EmitOp_MemBaseIndex_Rn5_Rm16:
+  if (!rmRel->as<Mem>().hasBaseReg())
+    goto InvalidAddress;
+
+  if (rmRel->as<Mem>().indexId() > 30 && rmRel->as<Mem>().indexId() != Gp::kIdZr)
+    goto InvalidPhysId;
+
+  opcode.addReg(rmRel->as<Mem>().indexId(), 16);
+  opcode.addReg(rmRel->as<Mem>().baseId(), 5);
+  goto EmitOp;
+
+  // --------------------------------------------------------------------------
+  // [EmitOp - PC Relative]
+  // --------------------------------------------------------------------------
+
+EmitOp_Rel:
+  {
+    if (rmRel->isLabel() || rmRel->isMem()) {
+      uint32_t labelId;
+      int64_t labelOffset = 0;
+
+      if (rmRel->isLabel()) {
+        labelId = rmRel->as<Label>().id();
+      }
+      else {
+        labelId = rmRel->as<Mem>().baseId();
+        labelOffset = rmRel->as<Mem>().offset();
+      }
+
+      LabelEntry* label = _code->labelEntry(labelId);
+      if (ASMJIT_UNLIKELY(!label))
+        goto InvalidLabel;
+
+      if (offsetFormat.type() == OffsetType::kAArch64_ADRP) {
+        // TODO: [ARM] Always create relocation entry.
+      }
+
+      if (label->isBoundTo(_section)) {
+        // Label bound to the current section.
+        offsetValue = label->offset() - uint64_t(offset()) + uint64_t(labelOffset);
+        goto EmitOp_DispImm;
+      }
+      else {
+        // Record non-bound label.
+        size_t codeOffset = writer.offsetFrom(_bufferData);
+        LabelLink* link = _code->newLabelLink(label, _section->id(), codeOffset, intptr_t(labelOffset), offsetFormat);
+
+        if (ASMJIT_UNLIKELY(!link))
+          goto OutOfMemory;
+
+        goto EmitOp;
+      }
+    }
+  }
+
+  if (rmRel->isImm()) {
+    uint64_t baseAddress = _code->baseAddress();
+    uint64_t targetOffset = rmRel->as<Imm>().valueAs<uint64_t>();
+
+    size_t codeOffset = writer.offsetFrom(_bufferData);
+
+    if (baseAddress == Globals::kNoBaseAddress || _section->id() != 0) {
+      // Create a new RelocEntry as we cannot calculate the offset right now.
+      RelocEntry* re;
+      err = _code->newRelocEntry(&re, RelocType::kAbsToRel);
+      if (err)
+        goto Failed;
+
+      re->_sourceSectionId = _section->id();
+      re->_sourceOffset = codeOffset;
+      re->_format = offsetFormat;
+      re->_payload = rmRel->as<Imm>().valueAs<uint64_t>() + 4u;
+      goto EmitOp;
+    }
+    else {
+      uint64_t pc = baseAddress + codeOffset;
+
+      if (offsetFormat.type() == OffsetType::kAArch64_ADRP)
+        pc &= ~uint64_t(4096 - 1);
+
+      offsetValue = targetOffset - pc;
+      goto EmitOp_DispImm;
+    }
+  }
+
+  goto InvalidInstruction;
+
+EmitOp_DispImm:
+  {
+    if ((offsetValue & Support::lsbMask<uint32_t>(offsetFormat.immDiscardLsb())) != 0)
+      goto InvalidDisplacement;
+
+    int64_t dispImm64 = int64_t(offsetValue) >> offsetFormat.immDiscardLsb();
+    if (!Support::isEncodableOffset64(dispImm64, offsetFormat.immBitCount()))
+      goto InvalidDisplacement;
+
+    uint32_t dispImm32 = uint32_t(dispImm64 & Support::lsbMask<uint32_t>(offsetFormat.immBitCount()));
+    switch (offsetFormat.type()) {
+      case OffsetType::kSignedOffset: {
+        opcode.addImm(dispImm32, offsetFormat.immBitShift());
+        goto EmitOp;
+      }
+
+      case OffsetType::kAArch64_ADR:
+      case OffsetType::kAArch64_ADRP: {
+        uint32_t immLo = dispImm32 & 0x3u;
+        uint32_t immHi = dispImm32 >> 2;
+        opcode.addImm(immLo, 29);
+        opcode.addImm(immHi, 5);
+        goto EmitOp;
+      }
+
+      default:
+        goto InvalidDisplacement;
+    }
+  }
+
+  // --------------------------------------------------------------------------
+  // [EmitOp - Opcode]
+  // --------------------------------------------------------------------------
+
+EmitOp:
+  writer.emit32uLE(opcode.get());
+  goto EmitDone;
+
+  // --------------------------------------------------------------------------
+  // [Done]
+  // --------------------------------------------------------------------------
+
+EmitDone:
+  if (Support::test(options, InstOptions::kReserved)) {
+#ifndef ASMJIT_NO_LOGGING
+    if (_logger)
+      EmitterUtils::logInstructionEmitted(this, BaseInst::composeARMInstId(instId, instCC), options, o0, o1, o2, opExt, 0, 0, writer.cursor());
+#endif
+  }
+
+  resetExtraReg();
+  resetInstOptions();
+  resetInlineComment();
+
+  writer.done(this);
+  return kErrorOk;
+
+  // --------------------------------------------------------------------------
+  // [Error Handler]
+  // --------------------------------------------------------------------------
+
+#define ERROR_HANDLER(ERR) ERR: err = DebugUtils::errored(kError##ERR); goto Failed;
+  ERROR_HANDLER(OutOfMemory)
+  ERROR_HANDLER(InvalidAddress)
+  ERROR_HANDLER(InvalidAddressScale)
+  ERROR_HANDLER(InvalidDisplacement)
+  ERROR_HANDLER(InvalidElementIndex)
+  ERROR_HANDLER(InvalidLabel)
+  ERROR_HANDLER(InvalidImmediate)
+  ERROR_HANDLER(InvalidInstruction)
+  ERROR_HANDLER(InvalidPhysId)
+  ERROR_HANDLER(InvalidRegType)
+#undef ERROR_HANDLER
+
+Failed:
+#ifndef ASMJIT_NO_LOGGING
+  return EmitterUtils::logInstructionFailed(this, err, instId, options, o0, o1, o2, opExt);
+#else
+  resetExtraReg();
+  resetInstOptions();
+  resetInlineComment();
+  return reportError(err);
+#endif
+}
+
+#undef ENC_OPS1
+#undef ENC_OPS2
+#undef ENC_OPS3
+#undef ENC_OPS4
+
+// a64::Assembler - Align
+// ======================
+
+Error Assembler::align(AlignMode alignMode, uint32_t alignment) {
+  constexpr uint32_t kNopA64 = 0xD503201Fu; // [11010101|00000011|00100000|00011111].
+
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  if (ASMJIT_UNLIKELY(uint32_t(alignMode) > uint32_t(AlignMode::kMaxValue)))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  if (alignment <= 1)
+    return kErrorOk;
+
+  if (ASMJIT_UNLIKELY(alignment > Globals::kMaxAlignment || !Support::isPowerOf2(alignment)))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  uint32_t i = uint32_t(Support::alignUpDiff<size_t>(offset(), alignment));
+  if (i == 0)
+    return kErrorOk;
+
+  CodeWriter writer(this);
+  ASMJIT_PROPAGATE(writer.ensureSpace(this, i));
+
+  switch (alignMode) {
+    case AlignMode::kCode: {
+      uint32_t pattern = kNopA64;
+
+      if (ASMJIT_UNLIKELY(offset() & 0x3u))
+        return DebugUtils::errored(kErrorInvalidState);
+
+      while (i >= 4) {
+        writer.emit32uLE(pattern);
+        i -= 4;
+      }
+
+      ASMJIT_ASSERT(i == 0);
+      break;
+    }
+
+    case AlignMode::kData:
+    case AlignMode::kZero:
+      writer.emitZeros(i);
+      break;
+  }
+
+  writer.done(this);
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger) {
+    StringTmp<128> sb;
+    sb.appendChars(' ', _logger->indentation(FormatIndentationGroup::kCode));
+    sb.appendFormat("align %u\n", alignment);
+    _logger->log(sb);
+  }
+#endif
+
+  return kErrorOk;
+}
+
+// a64::Assembler - Events
+// =======================
+
+Error Assembler::onAttach(CodeHolder* code) noexcept {
+  ASMJIT_PROPAGATE(Base::onAttach(code));
+  return kErrorOk;
+}
+
+Error Assembler::onDetach(CodeHolder* code) noexcept {
+  return Base::onDetach(code);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64
diff --git a/lib/lepton/asmjit/arm/a64assembler.h b/lib/lepton/asmjit/arm/a64assembler.h
new file mode 100644
index 0000000000..f1ac72b8d5
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64assembler.h
@@ -0,0 +1,72 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64ASSEMBLER_H_INCLUDED
+#define ASMJIT_ARM_A64ASSEMBLER_H_INCLUDED
+
+#include "../core/assembler.h"
+#include "../arm/a64emitter.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! AArch64 assembler implementation.
+class ASMJIT_VIRTAPI Assembler
+  : public BaseAssembler,
+    public EmitterExplicitT<Assembler> {
+
+public:
+  typedef BaseAssembler Base;
+
+  //! \name Construction / Destruction
+  //! \{
+
+  ASMJIT_API Assembler(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Assembler() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Gets whether the current ARM mode is THUMB (alternative to 32-bit ARM encoding).
+  inline bool isInThumbMode() const noexcept { return _environment.isArchThumb(); }
+
+  //! Gets the current code alignment of the current mode (ARM vs THUMB).
+  inline uint32_t codeAlignment() const noexcept { return isInThumbMode() ? 2 : 4; }
+
+  //! \}
+
+  //! \name Emit
+  //! \{
+
+  ASMJIT_API Error _emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) override;
+
+  //! \}
+
+  //! \name Align
+  //! \{
+
+  ASMJIT_API Error align(AlignMode alignMode, uint32_t alignment) override;
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64ASSEMBLER_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64builder.cpp b/lib/lepton/asmjit/arm/a64builder.cpp
new file mode 100644
index 0000000000..3a52b2a578
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64builder.cpp
@@ -0,0 +1,51 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64) && !defined(ASMJIT_NO_BUILDER)
+
+#include "../arm/a64assembler.h"
+#include "../arm/a64builder.h"
+#include "../arm/a64emithelper_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::Builder - Construction & Destruction
+// =========================================
+
+Builder::Builder(CodeHolder* code) noexcept : BaseBuilder() {
+  _archMask = uint64_t(1) << uint32_t(Arch::kAArch64);
+  assignEmitterFuncs(this);
+
+  if (code)
+    code->attach(this);
+}
+Builder::~Builder() noexcept {}
+
+// a64::Builder - Events
+// =====================
+
+Error Builder::onAttach(CodeHolder* code) noexcept {
+  return Base::onAttach(code);
+}
+
+Error Builder::onDetach(CodeHolder* code) noexcept {
+  return Base::onDetach(code);
+}
+
+// a64::Builder - Finalize
+// =======================
+
+Error Builder::finalize() {
+  ASMJIT_PROPAGATE(runPasses());
+  Assembler a(_code);
+  a.addEncodingOptions(encodingOptions());
+  a.addDiagnosticOptions(diagnosticOptions());
+  return serializeTo(&a);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64 && !ASMJIT_NO_BUILDER
diff --git a/lib/lepton/asmjit/arm/a64builder.h b/lib/lepton/asmjit/arm/a64builder.h
new file mode 100644
index 0000000000..adc99aafc8
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64builder.h
@@ -0,0 +1,57 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64BUILDER_H_INCLUDED
+#define ASMJIT_ARM_A64BUILDER_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_BUILDER
+
+#include "../core/builder.h"
+#include "../arm/a64emitter.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! AArch64 builder implementation.
+class ASMJIT_VIRTAPI Builder
+  : public BaseBuilder,
+    public EmitterExplicitT<Builder> {
+public:
+  ASMJIT_NONCOPYABLE(Builder)
+  typedef BaseBuilder Base;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API explicit Builder(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Builder() noexcept;
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+
+  //! \name Finalize
+  //! \{
+
+  ASMJIT_API Error finalize() override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_BUILDER
+#endif // ASMJIT_ARM_A64BUILDER_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64compiler.cpp b/lib/lepton/asmjit/arm/a64compiler.cpp
new file mode 100644
index 0000000000..d6c4ed28ff
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64compiler.cpp
@@ -0,0 +1,60 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64) && !defined(ASMJIT_NO_COMPILER)
+
+#include "../arm/a64assembler.h"
+#include "../arm/a64compiler.h"
+#include "../arm/a64emithelper_p.h"
+#include "../arm/a64rapass_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::Compiler - Construction & Destruction
+// ==========================================
+
+Compiler::Compiler(CodeHolder* code) noexcept : BaseCompiler() {
+  _archMask = uint64_t(1) << uint32_t(Arch::kAArch64);
+  assignEmitterFuncs(this);
+
+  if (code)
+    code->attach(this);
+}
+Compiler::~Compiler() noexcept {}
+
+// a64::Compiler - Events
+// ======================
+
+Error Compiler::onAttach(CodeHolder* code) noexcept {
+  ASMJIT_PROPAGATE(Base::onAttach(code));
+  Error err = addPassT<ARMRAPass>();
+
+  if (ASMJIT_UNLIKELY(err)) {
+    onDetach(code);
+    return err;
+  }
+
+  return kErrorOk;
+}
+
+Error Compiler::onDetach(CodeHolder* code) noexcept {
+  return Base::onDetach(code);
+}
+
+// a64::Compiler - Finalize
+// ========================
+
+Error Compiler::finalize() {
+  ASMJIT_PROPAGATE(runPasses());
+  Assembler a(_code);
+  a.addEncodingOptions(encodingOptions());
+  a.addDiagnosticOptions(diagnosticOptions());
+  return serializeTo(&a);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64 && !ASMJIT_NO_COMPILER
diff --git a/lib/lepton/asmjit/arm/a64compiler.h b/lib/lepton/asmjit/arm/a64compiler.h
new file mode 100644
index 0000000000..bed408a98f
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64compiler.h
@@ -0,0 +1,247 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_ARMCOMPILER_H_INCLUDED
+#define ASMJIT_ARM_ARMCOMPILER_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/compiler.h"
+#include "../core/type.h"
+#include "../arm/a64emitter.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! AArch64 compiler implementation.
+class ASMJIT_VIRTAPI Compiler
+  : public BaseCompiler,
+    public EmitterExplicitT<Compiler> {
+public:
+  ASMJIT_NONCOPYABLE(Compiler)
+  typedef BaseCompiler Base;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API explicit Compiler(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Compiler() noexcept;
+
+  //! \}
+
+  //! \name Virtual Registers
+  //! \{
+
+  //! \cond INTERNAL
+  template<typename RegT, typename Type>
+  inline RegT _newRegInternal(const Type& type) {
+    RegT reg(Globals::NoInit);
+    _newReg(&reg, type, nullptr);
+    return reg;
+  }
+
+  template<typename RegT, typename Type, typename... Args>
+  inline RegT _newRegInternal(const Type& type, const char* s, Args&&... args) {
+#ifndef ASMJIT_NO_LOGGING
+    RegT reg(Globals::NoInit);
+    if (sizeof...(Args) == 0)
+      _newReg(&reg, type, s);
+    else
+      _newRegFmt(&reg, type, s, std::forward<Args>(args)...);
+    return reg;
+#else
+    DebugUtils::unused(std::forward<Args>(args)...);
+    RegT reg(Globals::NoInit);
+    _newReg(&reg, type, nullptr);
+    return reg;
+#endif
+  }
+  //! \endcond
+
+  template<typename RegT, typename... Args>
+  inline RegT newSimilarReg(const RegT& ref, Args&&... args) {
+    return _newRegInternal<RegT>(ref, std::forward<Args>(args)...);
+  }
+
+  template<typename... Args>
+  inline Reg newReg(TypeId typeId, Args&&... args) { return _newRegInternal<Reg>(typeId, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Gp newGp(TypeId typeId, Args&&... args) { return _newRegInternal<Gp>(typeId, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Vec newVec(TypeId typeId, Args&&... args) { return _newRegInternal<Vec>(typeId, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Gp newInt32(Args&&... args) { return _newRegInternal<Gp>(TypeId::kInt32, std::forward<Args>(args)...); }
+  template<typename... Args>
+  inline Gp newUInt32(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUInt32, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Gp newInt64(Args&&... args) { return _newRegInternal<Gp>(TypeId::kInt64, std::forward<Args>(args)...); }
+  template<typename... Args>
+  inline Gp newUInt64(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUInt64, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Gp newIntPtr(Args&&... args) { return _newRegInternal<Gp>(TypeId::kIntPtr, std::forward<Args>(args)...); }
+  template<typename... Args>
+  inline Gp newUIntPtr(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUIntPtr, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Gp newGpw(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUInt32, std::forward<Args>(args)...); }
+  template<typename... Args>
+  inline Gp newGpx(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUInt64, std::forward<Args>(args)...); }
+  template<typename... Args>
+  inline Gp newGpz(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUIntPtr, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Vec newVecS(Args&&... args) { return _newRegInternal<Vec>(TypeId::kFloat32, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Vec newVecD(Args&&... args) { return _newRegInternal<Vec>(TypeId::kFloat64, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Vec newVecQ(Args&&... args) { return _newRegInternal<Vec>(TypeId::kUInt8x16, std::forward<Args>(args)...); }
+
+  //! \}
+
+  //! \name Stack
+  //! \{
+
+  //! Creates a new memory chunk allocated on the current function's stack.
+  inline Mem newStack(uint32_t size, uint32_t alignment, const char* name = nullptr) {
+    Mem m(Globals::NoInit);
+    _newStack(&m, size, alignment, name);
+    return m;
+  }
+
+  //! \}
+
+  //! \name Constants
+  //! \{
+
+  //! Put data to a constant-pool and get a memory reference to it.
+  inline Mem newConst(ConstPoolScope scope, const void* data, size_t size) {
+    Mem m(Globals::NoInit);
+    _newConst(&m, scope, data, size);
+    return m;
+  }
+
+  //! Put a BYTE `val` to a constant-pool (8 bits).
+  inline Mem newByteConst(ConstPoolScope scope, uint8_t val) noexcept { return newConst(scope, &val, 1); }
+  //! Put a HWORD `val` to a constant-pool (16 bits).
+  inline Mem newHWordConst(ConstPoolScope scope, uint16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a WORD `val` to a constant-pool (32 bits).
+  inline Mem newWordConst(ConstPoolScope scope, uint32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a DWORD `val` to a constant-pool (64 bits).
+  inline Mem newDWordConst(ConstPoolScope scope, uint64_t val) noexcept { return newConst(scope, &val, 8); }
+
+  //! Put a WORD `val` to a constant-pool.
+  inline Mem newInt16Const(ConstPoolScope scope, int16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a WORD `val` to a constant-pool.
+  inline Mem newUInt16Const(ConstPoolScope scope, uint16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a DWORD `val` to a constant-pool.
+  inline Mem newInt32Const(ConstPoolScope scope, int32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a DWORD `val` to a constant-pool.
+  inline Mem newUInt32Const(ConstPoolScope scope, uint32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a QWORD `val` to a constant-pool.
+  inline Mem newInt64Const(ConstPoolScope scope, int64_t val) noexcept { return newConst(scope, &val, 8); }
+  //! Put a QWORD `val` to a constant-pool.
+  inline Mem newUInt64Const(ConstPoolScope scope, uint64_t val) noexcept { return newConst(scope, &val, 8); }
+
+  //! Put a SP-FP `val` to a constant-pool.
+  inline Mem newFloatConst(ConstPoolScope scope, float val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a DP-FP `val` to a constant-pool.
+  inline Mem newDoubleConst(ConstPoolScope scope, double val) noexcept { return newConst(scope, &val, 8); }
+
+  //! \}
+
+  //! \name Instruction Options
+  //! \{
+
+  //! Force the compiler to not follow the conditional or unconditional jump.
+  inline Compiler& unfollow() noexcept { _instOptions |= InstOptions::kUnfollow; return *this; }
+
+  //! \}
+
+  //! \name Compiler specific
+  //! \{
+
+  //! Special pseudo-instruction that can be used to load a memory address into `o0` GP register.
+  //!
+  //! \note At the moment this instruction is only useful to load a stack allocated address into a GP register
+  //! for further use. It makes very little sense to use it for anything else. The semantics of this instruction
+  //! is the same as X86 `LEA` (load effective address) instruction.
+  inline Error loadAddressOf(const Gp& o0, const Mem& o1) { return _emitter()->_emitI(Inst::kIdAdr, o0, o1); }
+
+  //! \}
+
+  //! \name Function Call & Ret Intrinsics
+  //! \{
+
+  //! Invoke a function call without `target` type enforcement.
+  inline Error invoke_(InvokeNode** out, const Operand_& target, const FuncSignature& signature) {
+    return addInvokeNode(out, Inst::kIdBlr, target, signature);
+  }
+
+  //! Invoke a function call of the given `target` and `signature` and store the added node to `out`.
+  //!
+  //! Creates a new \ref InvokeNode, initializes all the necessary members to match the given function `signature`,
+  //! adds the node to the compiler, and stores its pointer to `out`. The operation is atomic, if anything fails
+  //! nullptr is stored in `out` and error code is returned.
+  inline Error invoke(InvokeNode** out, const Gp& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Mem& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Label& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Imm& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, uint64_t target, const FuncSignature& signature) { return invoke_(out, Imm(int64_t(target)), signature); }
+
+  //! Return.
+  inline Error ret() { return addRet(Operand(), Operand()); }
+  //! \overload
+  inline Error ret(const BaseReg& o0) { return addRet(o0, Operand()); }
+  //! \overload
+  inline Error ret(const BaseReg& o0, const BaseReg& o1) { return addRet(o0, o1); }
+
+  //! \}
+
+  //! \name Jump Tables Support
+  //! \{
+
+  using EmitterExplicitT<Compiler>::br;
+
+  //! Adds a jump to the given `target` with the provided jump `annotation`.
+  inline Error br(const BaseReg& target, JumpAnnotation* annotation) { return emitAnnotatedJump(Inst::kIdBr, target, annotation); }
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+
+  //! \name Finalize
+  //! \{
+
+  ASMJIT_API Error finalize() override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_ARM_ARMCOMPILER_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64emithelper.cpp b/lib/lepton/asmjit/arm/a64emithelper.cpp
new file mode 100644
index 0000000000..1e8da619a6
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64emithelper.cpp
@@ -0,0 +1,464 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64)
+
+#include "../core/formatter.h"
+#include "../core/funcargscontext_p.h"
+#include "../core/string.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../arm/a64emithelper_p.h"
+#include "../arm/a64formatter_p.h"
+#include "../arm/a64instapi_p.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::EmitHelper - Emit Operations
+// =================================
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitRegMove(
+  const Operand_& dst_,
+  const Operand_& src_, TypeId typeId, const char* comment) {
+
+  Emitter* emitter = _emitter->as<Emitter>();
+
+  // Invalid or abstract TypeIds are not allowed.
+  ASMJIT_ASSERT(TypeUtils::isValid(typeId) && !TypeUtils::isAbstract(typeId));
+
+  emitter->setInlineComment(comment);
+
+  if (dst_.isReg() && src_.isMem()) {
+    Reg dst(dst_.as<Reg>());
+    Mem src(src_.as<Mem>());
+
+    switch (typeId) {
+      case TypeId::kInt8:
+      case TypeId::kUInt8:
+        return emitter->ldrb(dst.as<Gp>(), src);
+
+      case TypeId::kInt16:
+      case TypeId::kUInt16:
+        return emitter->ldrh(dst.as<Gp>(), src);
+
+      case TypeId::kInt32:
+      case TypeId::kUInt32:
+        return emitter->ldr(dst.as<Gp>().w(), src);
+
+      case TypeId::kInt64:
+      case TypeId::kUInt64:
+        return emitter->ldr(dst.as<Gp>().x(), src);
+
+      default: {
+        if (TypeUtils::isFloat32(typeId) || TypeUtils::isVec32(typeId))
+          return emitter->ldr(dst.as<Vec>().s(), src);
+
+        if (TypeUtils::isFloat64(typeId) || TypeUtils::isVec64(typeId))
+          return emitter->ldr(dst.as<Vec>().d(), src);
+
+        if (TypeUtils::isVec128(typeId))
+          return emitter->ldr(dst.as<Vec>().q(), src);
+
+        break;
+      }
+    }
+  }
+
+  if (dst_.isMem() && src_.isReg()) {
+    Mem dst(dst_.as<Mem>());
+    Reg src(src_.as<Reg>());
+
+    switch (typeId) {
+      case TypeId::kInt8:
+      case TypeId::kUInt8:
+        return emitter->strb(src.as<Gp>(), dst);
+
+      case TypeId::kInt16:
+      case TypeId::kUInt16:
+        return emitter->strh(src.as<Gp>(), dst);
+
+      case TypeId::kInt32:
+      case TypeId::kUInt32:
+        return emitter->str(src.as<Gp>().w(), dst);
+
+      case TypeId::kInt64:
+      case TypeId::kUInt64:
+        return emitter->str(src.as<Gp>().x(), dst);
+
+      default: {
+        if (TypeUtils::isFloat32(typeId) || TypeUtils::isVec32(typeId))
+          return emitter->str(src.as<Vec>().s(), dst);
+
+        if (TypeUtils::isFloat64(typeId) || TypeUtils::isVec64(typeId))
+          return emitter->str(src.as<Vec>().d(), dst);
+
+        if (TypeUtils::isVec128(typeId))
+          return emitter->str(src.as<Vec>().q(), dst);
+
+        break;
+      }
+    }
+  }
+
+  if (dst_.isReg() && src_.isReg()) {
+    Reg dst(dst_.as<Reg>());
+    Reg src(src_.as<Reg>());
+
+    switch (typeId) {
+      case TypeId::kInt8:
+      case TypeId::kUInt8:
+      case TypeId::kInt16:
+      case TypeId::kUInt16:
+      case TypeId::kInt32:
+      case TypeId::kUInt32:
+      case TypeId::kInt64:
+      case TypeId::kUInt64:
+        return emitter->mov(dst.as<Gp>().x(), src.as<Gp>().x());
+
+      default: {
+        if (TypeUtils::isFloat32(typeId) || TypeUtils::isVec32(typeId))
+          return emitter->fmov(dst.as<Vec>().s(), src.as<Vec>().s());
+
+        if (TypeUtils::isFloat64(typeId) || TypeUtils::isVec64(typeId))
+          return emitter->mov(dst.as<Vec>().b8(), src.as<Vec>().b8());
+
+        if (TypeUtils::isVec128(typeId))
+          return emitter->mov(dst.as<Vec>().b16(), src.as<Vec>().b16());
+
+        break;
+      }
+    }
+  }
+
+  emitter->setInlineComment(nullptr);
+  return DebugUtils::errored(kErrorInvalidState);
+}
+
+Error EmitHelper::emitRegSwap(
+  const BaseReg& a,
+  const BaseReg& b, const char* comment) {
+
+  DebugUtils::unused(a, b, comment);
+  return DebugUtils::errored(kErrorInvalidState);
+}
+
+// TODO: [ARM] EmitArgMove is unfinished.
+Error EmitHelper::emitArgMove(
+  const BaseReg& dst_, TypeId dstTypeId,
+  const Operand_& src_, TypeId srcTypeId, const char* comment) {
+
+  // Deduce optional `dstTypeId`, which may be `TypeId::kVoid` in some cases.
+  if (dstTypeId == TypeId::kVoid) {
+    const ArchTraits& archTraits = ArchTraits::byArch(_emitter->arch());
+    dstTypeId = archTraits.regTypeToTypeId(dst_.type());
+  }
+
+  // Invalid or abstract TypeIds are not allowed.
+  ASMJIT_ASSERT(TypeUtils::isValid(dstTypeId) && !TypeUtils::isAbstract(dstTypeId));
+  ASMJIT_ASSERT(TypeUtils::isValid(srcTypeId) && !TypeUtils::isAbstract(srcTypeId));
+
+  Reg dst(dst_.as<Reg>());
+  Operand src(src_);
+
+  uint32_t dstSize = TypeUtils::sizeOf(dstTypeId);
+  uint32_t srcSize = TypeUtils::sizeOf(srcTypeId);
+
+  if (TypeUtils::isInt(dstTypeId)) {
+    if (TypeUtils::isInt(srcTypeId)) {
+      uint32_t x = dstSize == 8;
+
+      dst.setSignature(OperandSignature{x ? uint32_t(GpX::kSignature) : uint32_t(GpW::kSignature)});
+      _emitter->setInlineComment(comment);
+
+      if (src.isReg()) {
+        src.setSignature(dst.signature());
+        return _emitter->emit(Inst::kIdMov, dst, src);
+      }
+      else if (src.isMem()) {
+        InstId instId = Inst::kIdNone;
+          switch (srcTypeId) {
+          case TypeId::kInt8: instId = Inst::kIdLdrsb; break;
+          case TypeId::kUInt8: instId = Inst::kIdLdrb; break;
+          case TypeId::kInt16: instId = Inst::kIdLdrsh; break;
+          case TypeId::kUInt16: instId = Inst::kIdLdrh; break;
+          case TypeId::kInt32: instId = x ? Inst::kIdLdrsw : Inst::kIdLdr; break;
+          case TypeId::kUInt32: instId = Inst::kIdLdr; x = 0; break;
+          case TypeId::kInt64: instId = Inst::kIdLdr; break;
+          case TypeId::kUInt64: instId = Inst::kIdLdr; break;
+          default:
+            return DebugUtils::errored(kErrorInvalidState);
+        }
+        return _emitter->emit(instId, dst, src);
+      }
+    }
+  }
+
+  if (TypeUtils::isFloat(dstTypeId) || TypeUtils::isVec(dstTypeId)) {
+    if (TypeUtils::isFloat(srcTypeId) || TypeUtils::isVec(srcTypeId)) {
+      switch (srcSize) {
+        case 2: dst.as<Vec>().setSignature(OperandSignature{VecH::kSignature}); break;
+        case 4: dst.as<Vec>().setSignature(OperandSignature{VecS::kSignature}); break;
+        case 8: dst.as<Vec>().setSignature(OperandSignature{VecD::kSignature}); break;
+        case 16: dst.as<Vec>().setSignature(OperandSignature{VecV::kSignature}); break;
+        default:
+          return DebugUtils::errored(kErrorInvalidState);
+      }
+
+      _emitter->setInlineComment(comment);
+
+      if (src.isReg()) {
+        InstId instId = srcSize <= 4 ? Inst::kIdFmov_v : Inst::kIdMov_v;
+        src.setSignature(dst.signature());
+        return _emitter->emit(instId, dst, src);
+      }
+      else if (src.isMem()) {
+        return _emitter->emit(Inst::kIdLdr_v, dst, src);
+      }
+    }
+  }
+
+  return DebugUtils::errored(kErrorInvalidState);
+}
+
+// a64::EmitHelper - Emit Prolog & Epilog
+// ======================================
+
+struct LoadStoreInstructions {
+  InstId singleInstId;
+  InstId pairInstId;
+};
+
+struct PrologEpilogInfo {
+  struct RegPair {
+    uint8_t ids[2];
+    uint16_t offset;
+  };
+
+  struct GroupData {
+    RegPair pairs[16];
+    uint32_t pairCount;
+  };
+
+  Support::Array<GroupData, 2> groups;
+  uint32_t sizeTotal;
+
+  Error init(const FuncFrame& frame) noexcept {
+    uint32_t offset = 0;
+
+    for (RegGroup group : Support::EnumValues<RegGroup, RegGroup::kGp, RegGroup::kVec>{}) {
+      GroupData& data = groups[group];
+
+      uint32_t n = 0;
+      uint32_t pairCount = 0;
+      RegPair* pairs = data.pairs;
+
+      uint32_t slotSize = frame.saveRestoreRegSize(group);
+      uint32_t savedRegs = frame.savedRegs(group);
+
+      if (group == RegGroup::kGp && frame.hasPreservedFP()) {
+        // Must be at the beginning of the push/pop sequence.
+        ASMJIT_ASSERT(pairCount == 0);
+
+        pairs[0].offset = uint16_t(offset);
+        pairs[0].ids[0] = Gp::kIdFp;
+        pairs[0].ids[1] = Gp::kIdLr;
+        offset += slotSize * 2;
+        pairCount++;
+
+        savedRegs &= ~Support::bitMask(Gp::kIdFp, Gp::kIdLr);
+      }
+
+      Support::BitWordIterator<uint32_t> it(savedRegs);
+      while (it.hasNext()) {
+        pairs[pairCount].ids[n] = uint8_t(it.next());
+
+        if (++n == 2) {
+          pairs[pairCount].offset = uint16_t(offset);
+          offset += slotSize * 2;
+
+          n = 0;
+          pairCount++;
+        }
+      }
+
+      if (n == 1) {
+        pairs[pairCount].ids[1] = uint8_t(BaseReg::kIdBad);
+        pairs[pairCount].offset = uint16_t(offset);
+        offset += slotSize * 2;
+        pairCount++;
+      }
+
+      data.pairCount = pairCount;
+    }
+
+    sizeTotal = offset;
+    return kErrorOk;
+  }
+};
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitProlog(const FuncFrame& frame) {
+  Emitter* emitter = _emitter->as<Emitter>();
+
+  PrologEpilogInfo pei;
+  ASMJIT_PROPAGATE(pei.init(frame));
+
+  static const Support::Array<Reg, 2> groupRegs = {{ x0, d0 }};
+  static const Support::Array<LoadStoreInstructions, 2> groupInsts = {{
+    { Inst::kIdStr  , Inst::kIdStp   },
+    { Inst::kIdStr_v, Inst::kIdStp_v }
+  }};
+
+  uint32_t adjustInitialOffset = pei.sizeTotal;
+
+  for (RegGroup group : Support::EnumValues<RegGroup, RegGroup::kGp, RegGroup::kVec>{}) {
+    const PrologEpilogInfo::GroupData& data = pei.groups[group];
+    uint32_t pairCount = data.pairCount;
+
+    Reg regs[2] = { groupRegs[group], groupRegs[group] };
+    Mem mem = ptr(sp);
+
+    const LoadStoreInstructions& insts = groupInsts[group];
+    for (uint32_t i = 0; i < pairCount; i++) {
+      const PrologEpilogInfo::RegPair& pair = data.pairs[i];
+
+      regs[0].setId(pair.ids[0]);
+      regs[1].setId(pair.ids[1]);
+      mem.setOffsetLo32(pair.offset);
+
+      if (pair.offset == 0 && adjustInitialOffset) {
+        mem.setOffset(-int(adjustInitialOffset));
+        mem.makePreIndex();
+      }
+
+      if (pair.ids[1] == BaseReg::kIdBad)
+        ASMJIT_PROPAGATE(emitter->emit(insts.singleInstId, regs[0], mem));
+      else
+        ASMJIT_PROPAGATE(emitter->emit(insts.pairInstId, regs[0], regs[1], mem));
+
+      mem.resetToFixedOffset();
+
+      if (i == 0 && frame.hasPreservedFP()) {
+        ASMJIT_PROPAGATE(emitter->mov(x29, sp));
+      }
+    }
+  }
+
+  if (frame.hasStackAdjustment()) {
+    uint32_t adj = frame.stackAdjustment();
+    if (adj <= 0xFFFu) {
+      ASMJIT_PROPAGATE(emitter->sub(sp, sp, adj));
+    }
+    else if (adj <= 0xFFFFFFu)  {
+      // TODO: [ARM] Prolog - we must touch the pages otherwise it's undefined.
+      ASMJIT_PROPAGATE(emitter->sub(sp, sp, adj & 0x000FFFu));
+      ASMJIT_PROPAGATE(emitter->sub(sp, sp, adj & 0xFFF000u));
+    }
+    else {
+      return DebugUtils::errored(kErrorInvalidState);
+    }
+  }
+
+  return kErrorOk;
+}
+
+// TODO: [ARM] Emit epilog.
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitEpilog(const FuncFrame& frame) {
+  Emitter* emitter = _emitter->as<Emitter>();
+
+  PrologEpilogInfo pei;
+  ASMJIT_PROPAGATE(pei.init(frame));
+
+  static const Support::Array<Reg, 2> groupRegs = {{ x0, d0 }};
+  static const Support::Array<LoadStoreInstructions, 2> groupInsts = {{
+    { Inst::kIdLdr  , Inst::kIdLdp   },
+    { Inst::kIdLdr_v, Inst::kIdLdp_v }
+  }};
+
+  uint32_t adjustInitialOffset = pei.sizeTotal;
+
+  if (frame.hasStackAdjustment()) {
+    uint32_t adj = frame.stackAdjustment();
+    if (adj <= 0xFFFu) {
+      ASMJIT_PROPAGATE(emitter->add(sp, sp, adj));
+    }
+    else if (adj <= 0xFFFFFFu)  {
+      ASMJIT_PROPAGATE(emitter->add(sp, sp, adj & 0x000FFFu));
+      ASMJIT_PROPAGATE(emitter->add(sp, sp, adj & 0xFFF000u));
+    }
+    else {
+      return DebugUtils::errored(kErrorInvalidState);
+    }
+  }
+
+  for (int g = 1; g >= 0; g--) {
+    RegGroup group = RegGroup(g);
+    const PrologEpilogInfo::GroupData& data = pei.groups[group];
+    uint32_t pairCount = data.pairCount;
+
+    Reg regs[2] = { groupRegs[group], groupRegs[group] };
+    Mem mem = ptr(sp);
+
+    const LoadStoreInstructions& insts = groupInsts[group];
+
+    for (int i = int(pairCount) - 1; i >= 0; i--) {
+      const PrologEpilogInfo::RegPair& pair = data.pairs[i];
+
+      regs[0].setId(pair.ids[0]);
+      regs[1].setId(pair.ids[1]);
+      mem.setOffsetLo32(pair.offset);
+
+      if (pair.offset == 0 && adjustInitialOffset) {
+        mem.setOffset(int(adjustInitialOffset));
+        mem.makePostIndex();
+      }
+
+      if (pair.ids[1] == BaseReg::kIdBad)
+        ASMJIT_PROPAGATE(emitter->emit(insts.singleInstId, regs[0], mem));
+      else
+        ASMJIT_PROPAGATE(emitter->emit(insts.pairInstId, regs[0], regs[1], mem));
+
+      mem.resetToFixedOffset();
+    }
+  }
+
+  ASMJIT_PROPAGATE(emitter->ret(x30));
+
+  return kErrorOk;
+}
+
+static Error ASMJIT_CDECL Emitter_emitProlog(BaseEmitter* emitter, const FuncFrame& frame) {
+  EmitHelper emitHelper(emitter);
+  return emitHelper.emitProlog(frame);
+}
+
+static Error ASMJIT_CDECL Emitter_emitEpilog(BaseEmitter* emitter, const FuncFrame& frame) {
+  EmitHelper emitHelper(emitter);
+  return emitHelper.emitEpilog(frame);
+}
+
+static Error ASMJIT_CDECL Emitter_emitArgsAssignment(BaseEmitter* emitter, const FuncFrame& frame, const FuncArgsAssignment& args) {
+  EmitHelper emitHelper(emitter);
+  return emitHelper.emitArgsAssignment(frame, args);
+}
+
+void assignEmitterFuncs(BaseEmitter* emitter) {
+  emitter->_funcs.emitProlog = Emitter_emitProlog;
+  emitter->_funcs.emitEpilog = Emitter_emitEpilog;
+  emitter->_funcs.emitArgsAssignment = Emitter_emitArgsAssignment;
+
+#ifndef ASMJIT_NO_LOGGING
+  emitter->_funcs.formatInstruction = FormatterInternal::formatInstruction;
+#endif
+
+#ifndef ASMJIT_NO_VALIDATION
+  emitter->_funcs.validate = InstInternal::validate;
+#endif
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64
diff --git a/lib/lepton/asmjit/arm/a64emithelper_p.h b/lib/lepton/asmjit/arm/a64emithelper_p.h
new file mode 100644
index 0000000000..b1ba1a9296
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64emithelper_p.h
@@ -0,0 +1,50 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_ARMEMITHELPER_P_H_INCLUDED
+#define ASMJIT_ARM_ARMEMITHELPER_P_H_INCLUDED
+
+#include "../core/api-config.h"
+
+#include "../core/emithelper_p.h"
+#include "../core/func.h"
+#include "../arm/a64emitter.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+class EmitHelper : public BaseEmitHelper {
+public:
+  inline explicit EmitHelper(BaseEmitter* emitter = nullptr) noexcept
+    : BaseEmitHelper(emitter) {}
+
+  Error emitRegMove(
+    const Operand_& dst_,
+    const Operand_& src_, TypeId typeId, const char* comment = nullptr) override;
+
+  Error emitRegSwap(
+    const BaseReg& a,
+    const BaseReg& b, const char* comment = nullptr) override;
+
+  Error emitArgMove(
+    const BaseReg& dst_, TypeId dstTypeId,
+    const Operand_& src_, TypeId srcTypeId, const char* comment = nullptr) override;
+
+  Error emitProlog(const FuncFrame& frame);
+  Error emitEpilog(const FuncFrame& frame);
+};
+
+void assignEmitterFuncs(BaseEmitter* emitter);
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_ARMEMITHELPER_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64emitter.h b/lib/lepton/asmjit/arm/a64emitter.h
new file mode 100644
index 0000000000..54354eaca8
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64emitter.h
@@ -0,0 +1,1228 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64EMITTER_H_INCLUDED
+#define ASMJIT_ARM_A64EMITTER_H_INCLUDED
+
+#include "../core/emitter.h"
+#include "../core/support.h"
+#include "../arm/a64instdb.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+#define ASMJIT_INST_0x(NAME, ID) \
+  inline Error NAME() { return _emitter()->_emitI(Inst::kId##ID); }
+
+#define ASMJIT_INST_1x(NAME, ID, T0) \
+  inline Error NAME(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID, o0); }
+
+#define ASMJIT_INST_2x(NAME, ID, T0, T1) \
+  inline Error NAME(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID, o0, o1); }
+
+#define ASMJIT_INST_3x(NAME, ID, T0, T1, T2) \
+  inline Error NAME(const T0& o0, const T1& o1, const T2& o2) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2); }
+
+#define ASMJIT_INST_4x(NAME, ID, T0, T1, T2, T3) \
+  inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3); }
+
+#define ASMJIT_INST_5x(NAME, ID, T0, T1, T2, T3, T4) \
+  inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, const T4& o4) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3, o4); }
+
+#define ASMJIT_INST_6x(NAME, ID, T0, T1, T2, T3, T4, T5) \
+  inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, const T4& o4, const T5& o5) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3, o4, o5); }
+
+#define ASMJIT_INST_1cc(NAME, ID, T0) \
+  inline Error NAME(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID, o0); } \
+  \
+  inline Error NAME(CondCode cc, const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, cc), o0); } \
+  \
+  inline Error NAME##_eq(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kEQ), o0); } \
+  inline Error NAME##_ne(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kNE), o0); } \
+  inline Error NAME##_cs(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kCS), o0); } \
+  inline Error NAME##_hs(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kHS), o0); } \
+  inline Error NAME##_cc(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kCC), o0); } \
+  inline Error NAME##_lo(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kLO), o0); } \
+  inline Error NAME##_mi(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kMI), o0); } \
+  inline Error NAME##_pl(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kPL), o0); } \
+  inline Error NAME##_vs(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kVS), o0); } \
+  inline Error NAME##_vc(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kVC), o0); } \
+  inline Error NAME##_hi(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kHI), o0); } \
+  inline Error NAME##_ls(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kLS), o0); } \
+  inline Error NAME##_ge(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kGE), o0); } \
+  inline Error NAME##_lt(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kLT), o0); } \
+  inline Error NAME##_gt(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kGT), o0); } \
+  inline Error NAME##_le(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kLE), o0); } \
+  inline Error NAME##_al(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kAL), o0); }
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! ARM emitter.
+//!
+//! NOTE: This class cannot be instantiated, you can only cast to it and use it as emitter that emits to either
+//! \ref Assembler, \ref Builder, or \ref Compiler (use withcaution with \ref Compiler as it expects virtual
+//! registers to be used).
+template<typename This>
+struct EmitterExplicitT {
+  //! \cond
+
+  // These two are unfortunately reported by the sanitizer. We know what we do, however, the sanitizer doesn't.
+  // I have tried to use reinterpret_cast instead, but that would generate bad code when compiled by MSC.
+  ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF inline This* _emitter() noexcept { return static_cast<This*>(this); }
+  ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF inline const This* _emitter() const noexcept { return static_cast<const This*>(this); }
+
+  //! \endcond
+
+  // --------------------------------------------------------------------------
+  // [Options]
+  // --------------------------------------------------------------------------
+
+protected:
+  inline This& _addInstOptions(InstOptions options) noexcept {
+    static_cast<This*>(this)->addInstOptions(options);
+    return *static_cast<This*>(this);
+  }
+
+public:
+  //! \name General Purpose Instructions
+  //! \{
+
+  ASMJIT_INST_3x(adc, Adc, Gp, Gp, Gp)
+  ASMJIT_INST_3x(adcs, Adcs, Gp, Gp, Gp)
+
+  ASMJIT_INST_3x(add, Add, Gp, Gp, Gp)
+  ASMJIT_INST_4x(add, Add, Gp, Gp, Gp, Imm)
+  ASMJIT_INST_3x(add, Add, Gp, Gp, Imm)
+  ASMJIT_INST_4x(add, Add, Gp, Gp, Imm, Imm)
+  ASMJIT_INST_3x(adds, Adds, Gp, Gp, Gp)
+  ASMJIT_INST_3x(adds, Adds, Gp, Gp, Imm)
+  ASMJIT_INST_4x(adds, Adds, Gp, Gp, Gp, Imm)
+  ASMJIT_INST_4x(adds, Adds, Gp, Gp, Imm, Imm)
+
+  ASMJIT_INST_2x(adr, Adr, Gp, Imm)
+  ASMJIT_INST_2x(adr, Adr, Gp, Label)
+  ASMJIT_INST_2x(adrp, Adrp, Gp, Imm)
+  ASMJIT_INST_2x(adrp, Adrp, Gp, Label)
+
+  ASMJIT_INST_3x(and_, And, Gp, Gp, Imm)
+  ASMJIT_INST_3x(and_, And, Gp, Gp, Gp)
+  ASMJIT_INST_4x(and_, And, Gp, Gp, Gp, Imm)
+  ASMJIT_INST_3x(ands, Ands, Gp, Gp, Imm)
+  ASMJIT_INST_3x(ands, Ands, Gp, Gp, Gp)
+  ASMJIT_INST_4x(ands, Ands, Gp, Gp, Gp, Imm)
+
+  ASMJIT_INST_3x(asr, Asr, Gp, Gp, Imm)
+  ASMJIT_INST_3x(asr, Asr, Gp, Gp, Gp)
+  ASMJIT_INST_3x(asrv, Asrv, Gp, Gp, Gp)
+
+  ASMJIT_INST_2x(at, At, Imm, Gp)
+
+  ASMJIT_INST_3x(bfc, Bfc, Gp, Imm, Imm)
+  ASMJIT_INST_4x(bfi, Bfi, Gp, Gp, Imm, Imm)
+  ASMJIT_INST_4x(bfm, Bfm, Gp, Gp, Imm, Imm)
+  ASMJIT_INST_4x(bfxil, Bfxil, Gp, Gp, Imm, Imm)
+
+  ASMJIT_INST_3x(bic, Bic, Gp, Gp, Imm);
+  ASMJIT_INST_3x(bic, Bic, Gp, Gp, Gp);
+  ASMJIT_INST_4x(bic, Bic, Gp, Gp, Gp, Imm);
+  ASMJIT_INST_3x(bics, Bics, Gp, Gp, Imm);
+  ASMJIT_INST_3x(bics, Bics, Gp, Gp, Gp);
+  ASMJIT_INST_4x(bics, Bics, Gp, Gp, Gp, Imm);
+
+  ASMJIT_INST_1x(brk, Brk, Imm)
+
+  ASMJIT_INST_4x(ccmn, Ccmn, Gp, Gp, Imm, Imm);
+  ASMJIT_INST_4x(ccmn, Ccmn, Gp, Imm, Imm, Imm);
+  ASMJIT_INST_4x(ccmp, Ccmp, Gp, Gp, Imm, Imm);
+  ASMJIT_INST_4x(ccmp, Ccmp, Gp, Imm, Imm, Imm);
+
+  ASMJIT_INST_3x(cinc, Cinc, Gp, Gp, Imm);
+  ASMJIT_INST_3x(cinv, Cinv, Gp, Gp, Imm);
+
+  ASMJIT_INST_1x(clrex, Clrex, Imm)
+
+  ASMJIT_INST_2x(cls, Cls, Gp, Gp)
+  ASMJIT_INST_2x(clz, Clz, Gp, Gp)
+
+  ASMJIT_INST_2x(cmn, Cmn, Gp, Gp)
+  ASMJIT_INST_3x(cmn, Cmn, Gp, Gp, Imm)
+  ASMJIT_INST_2x(cmn, Cmn, Gp, Imm)
+  ASMJIT_INST_3x(cmn, Cmn, Gp, Imm, Imm)
+  ASMJIT_INST_2x(cmp, Cmp, Gp, Gp)
+  ASMJIT_INST_3x(cmp, Cmp, Gp, Gp, Imm)
+  ASMJIT_INST_2x(cmp, Cmp, Gp, Imm)
+  ASMJIT_INST_3x(cmp, Cmp, Gp, Imm, Imm)
+
+  ASMJIT_INST_3x(cneg, Cneg, Gp, Gp, Imm);
+
+  ASMJIT_INST_4x(csel, Csel, Gp, Gp, Gp, Imm);
+  ASMJIT_INST_2x(cset, Cset, Gp, Imm);
+  ASMJIT_INST_2x(csetm, Csetm, Gp, Imm);
+
+  ASMJIT_INST_4x(csinc, Csinc, Gp, Gp, Gp, Imm);
+  ASMJIT_INST_4x(csinv, Csinv, Gp, Gp, Gp, Imm);
+  ASMJIT_INST_4x(csneg, Csneg, Gp, Gp, Gp, Imm);
+
+  ASMJIT_INST_2x(dc, Dc, Imm, Gp)
+  ASMJIT_INST_1x(dmb, Dmb, Imm)
+  ASMJIT_INST_1x(dsb, Dsb, Imm)
+  ASMJIT_INST_0x(drps, Drps)
+
+  ASMJIT_INST_3x(eon, Eon, Gp, Gp, Gp)
+  ASMJIT_INST_4x(eon, Eon, Gp, Gp, Gp, Imm)
+
+  ASMJIT_INST_3x(eor, Eor, Gp, Gp, Imm)
+  ASMJIT_INST_3x(eor, Eor, Gp, Gp, Gp)
+  ASMJIT_INST_4x(eor, Eor, Gp, Gp, Gp, Imm)
+
+  ASMJIT_INST_0x(eret, Eret)
+  ASMJIT_INST_0x(esb, Esb)
+
+  ASMJIT_INST_4x(extr, Extr, Gp, Gp, Gp, Imm)
+
+  ASMJIT_INST_1x(hlt, Hlt, Imm)
+  ASMJIT_INST_1x(hvc, Hvc, Imm)
+  ASMJIT_INST_2x(ic, Ic, Imm, Gp)
+  ASMJIT_INST_1x(isb, Isb, Imm)
+
+  ASMJIT_INST_3x(lsl, Lsl, Gp, Gp, Imm)
+  ASMJIT_INST_3x(lsl, Lsl, Gp, Gp, Gp)
+  ASMJIT_INST_3x(lslv, Lslv, Gp, Gp, Gp)
+
+  ASMJIT_INST_3x(lsr, Lsr, Gp, Gp, Imm)
+  ASMJIT_INST_3x(lsr, Lsr, Gp, Gp, Gp)
+  ASMJIT_INST_3x(lsrv, Lsrv, Gp, Gp, Gp)
+
+  ASMJIT_INST_4x(madd, Madd, Gp, Gp, Gp, Gp)
+  ASMJIT_INST_3x(mneg, Mneg, Gp, Gp, Gp)
+
+  ASMJIT_INST_2x(mov, Mov, Gp, Gp)
+  ASMJIT_INST_2x(mov, Mov, Gp, Imm)
+  ASMJIT_INST_2x(movk, Movk, Gp, Imm)
+  ASMJIT_INST_3x(movk, Movk, Gp, Imm, Imm)
+  ASMJIT_INST_2x(movn, Movn, Gp, Imm)
+  ASMJIT_INST_3x(movn, Movn, Gp, Imm, Imm)
+  ASMJIT_INST_2x(movz, Movz, Gp, Imm)
+  ASMJIT_INST_3x(movz, Movz, Gp, Imm, Imm)
+
+  ASMJIT_INST_2x(mrs, Mrs, Gp, Imm)
+  ASMJIT_INST_2x(msr, Msr, Imm, Gp)
+  ASMJIT_INST_2x(msr, Msr, Imm, Imm)
+
+  ASMJIT_INST_4x(msub, Msub, Gp, Gp, Gp, Gp)
+  ASMJIT_INST_3x(mul, Mul, Gp, Gp, Gp)
+
+  ASMJIT_INST_2x(mvn, Mvn, Gp, Gp)
+  ASMJIT_INST_3x(mvn, Mvn, Gp, Gp, Imm)
+
+  ASMJIT_INST_2x(neg, Neg, Gp, Gp)
+  ASMJIT_INST_3x(neg, Neg, Gp, Gp, Imm)
+  ASMJIT_INST_2x(negs, Negs, Gp, Gp)
+  ASMJIT_INST_3x(negs, Negs, Gp, Gp, Imm)
+
+  ASMJIT_INST_2x(ngc, Ngc, Gp, Gp)
+  ASMJIT_INST_2x(ngcs, Ngcs, Gp, Gp)
+
+  ASMJIT_INST_3x(orn, Orn, Gp, Gp, Gp)
+  ASMJIT_INST_4x(orn, Orn, Gp, Gp, Gp, Imm)
+
+  ASMJIT_INST_3x(orr, Orr, Gp, Gp, Imm)
+  ASMJIT_INST_3x(orr, Orr, Gp, Gp, Gp)
+  ASMJIT_INST_4x(orr, Orr, Gp, Gp, Gp, Imm)
+
+  ASMJIT_INST_2x(rbit, Rbit, Gp, Gp)
+  ASMJIT_INST_1x(ret, Ret, Gp)
+
+  ASMJIT_INST_2x(rev, Rev, Gp, Gp)
+  ASMJIT_INST_2x(rev16, Rev16, Gp, Gp)
+  ASMJIT_INST_2x(rev32, Rev32, Gp, Gp)
+  ASMJIT_INST_2x(rev64, Rev64, Gp, Gp)
+
+  ASMJIT_INST_3x(ror, Ror, Gp, Gp, Imm)
+  ASMJIT_INST_3x(ror, Ror, Gp, Gp, Gp)
+  ASMJIT_INST_3x(rorv, Rorv, Gp, Gp, Gp)
+
+  ASMJIT_INST_3x(sbc, Sbc, Gp, Gp, Gp)
+  ASMJIT_INST_3x(sbcs, Sbcs, Gp, Gp, Gp)
+
+  ASMJIT_INST_4x(sbfiz, Sbfiz, Gp, Gp, Imm, Imm)
+  ASMJIT_INST_4x(sbfm, Sbfm, Gp, Gp, Imm, Imm)
+  ASMJIT_INST_4x(sbfx, Sbfx, Gp, Gp, Imm, Imm)
+
+  ASMJIT_INST_3x(sdiv, Sdiv, Gp, Gp, Gp)
+
+  ASMJIT_INST_4x(smaddl, Smaddl, Gp, Gp, Gp, Gp)
+  ASMJIT_INST_1x(smc, Smc, Imm)
+  ASMJIT_INST_3x(smnegl, Smnegl, Gp, Gp, Gp)
+  ASMJIT_INST_4x(smsubl, Smsubl, Gp, Gp, Gp, Gp)
+  ASMJIT_INST_3x(smulh, Smulh, Gp, Gp, Gp)
+  ASMJIT_INST_3x(smull, Smull, Gp, Gp, Gp)
+
+  ASMJIT_INST_3x(sub, Sub, Gp, Gp, Gp)
+  ASMJIT_INST_4x(sub, Sub, Gp, Gp, Gp, Imm)
+  ASMJIT_INST_3x(sub, Sub, Gp, Gp, Imm)
+  ASMJIT_INST_4x(sub, Sub, Gp, Gp, Imm, Imm)
+  ASMJIT_INST_3x(subs, Subs, Gp, Gp, Gp)
+  ASMJIT_INST_4x(subs, Subs, Gp, Gp, Gp, Imm)
+  ASMJIT_INST_3x(subs, Subs, Gp, Gp, Imm)
+  ASMJIT_INST_4x(subs, Subs, Gp, Gp, Imm, Imm)
+
+  ASMJIT_INST_1x(svc, Svc, Imm)
+
+  ASMJIT_INST_2x(sxtb, Sxtb, Gp, Gp)
+  ASMJIT_INST_2x(sxth, Sxth, Gp, Gp)
+  ASMJIT_INST_2x(sxtw, Sxtw, Gp, Gp)
+
+  ASMJIT_INST_4x(sys, Sys, Imm, Imm, Imm, Imm)
+  ASMJIT_INST_5x(sys, Sys, Imm, Imm, Imm, Imm, Gp)
+
+  ASMJIT_INST_2x(tlbi, Tlbi, Imm, Gp)
+  ASMJIT_INST_2x(tst, Tst, Gp, Imm)
+  ASMJIT_INST_2x(tst, Tst, Gp, Gp)
+  ASMJIT_INST_3x(tst, Tst, Gp, Gp, Imm)
+
+  ASMJIT_INST_3x(udiv, Udiv, Gp, Gp, Gp)
+
+  ASMJIT_INST_4x(ubfiz, Ubfiz, Gp, Gp, Imm, Imm)
+  ASMJIT_INST_4x(ubfm, Ubfm, Gp, Gp, Imm, Imm)
+  ASMJIT_INST_4x(ubfx, Ubfx, Gp, Gp, Imm, Imm)
+
+  ASMJIT_INST_4x(umaddl, Umaddl, Gp, Gp, Gp, Gp)
+  ASMJIT_INST_3x(umnegl, Umnegl, Gp, Gp, Gp)
+  ASMJIT_INST_4x(umsubl, Umsubl, Gp, Gp, Gp, Gp)
+  ASMJIT_INST_3x(umull, Umull, Gp, Gp, Gp)
+  ASMJIT_INST_3x(umulh, Umulh, Gp, Gp, Gp)
+
+  ASMJIT_INST_2x(uxtb, Uxtb, Gp, Gp)
+  ASMJIT_INST_2x(uxth, Uxth, Gp, Gp)
+
+  ASMJIT_INST_0x(csdb, Csdb)
+  ASMJIT_INST_1x(dcps1, Dcps1, Imm)
+  ASMJIT_INST_1x(dcps2, Dcps2, Imm)
+  ASMJIT_INST_1x(dcps3, Dcps3, Imm)
+  ASMJIT_INST_0x(dgh, Dgh)
+  ASMJIT_INST_0x(pssbb, Pssbb)
+  ASMJIT_INST_0x(ssbb, Ssbb)
+  ASMJIT_INST_1x(udf, Udf, Imm)
+  ASMJIT_INST_1x(setf8, Setf8, Gp)
+  ASMJIT_INST_1x(setf16, Setf16, Gp)
+
+  //! \}
+
+  //! \name ARMv8.4 Instructions
+  //! \{
+
+  ASMJIT_INST_0x(cfinv, Cfinv)
+
+  //! \}
+
+  //! \name ARMv8.5 Instructions
+  //! \{
+
+  ASMJIT_INST_0x(axflag, Axflag)
+  ASMJIT_INST_0x(xaflag, Xaflag)
+
+  //! \}
+
+  //! \name Branch Instructions
+  //! \{
+
+  ASMJIT_INST_1cc(b, B, Imm)
+  ASMJIT_INST_1cc(b, B, Label)
+  ASMJIT_INST_1x(bl, Bl, Imm)
+  ASMJIT_INST_1x(bl, Bl, Label)
+  ASMJIT_INST_1x(blr, Blr, Gp)
+  ASMJIT_INST_1x(br, Br, Gp)
+  ASMJIT_INST_2x(cbz, Cbz, Gp, Imm)
+  ASMJIT_INST_2x(cbz, Cbz, Gp, Label)
+  ASMJIT_INST_2x(cbnz, Cbnz, Gp, Imm)
+  ASMJIT_INST_2x(cbnz, Cbnz, Gp, Label)
+  ASMJIT_INST_3x(tbnz, Tbnz, Gp, Imm, Imm)
+  ASMJIT_INST_3x(tbnz, Tbnz, Gp, Imm, Label)
+  ASMJIT_INST_3x(tbz, Tbz, Gp, Imm, Imm)
+  ASMJIT_INST_3x(tbz, Tbz, Gp, Imm, Label)
+
+  //! \}
+
+  //! \name Load & Store Instructions
+  //! \{
+
+  ASMJIT_INST_3x(cas, Cas, Gp, Gp, Mem)
+  ASMJIT_INST_3x(casa, Casa, Gp, Gp, Mem)
+  ASMJIT_INST_3x(casab, Casab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(casah, Casah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(casal, Casal, Gp, Gp, Mem)
+  ASMJIT_INST_3x(casalb, Casalb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(casalh, Casalh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(casb, Casb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(cash, Cash, Gp, Gp, Mem)
+  ASMJIT_INST_3x(casl, Casl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(caslb, Caslb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(caslh, Caslh, Gp, Gp, Mem)
+
+  ASMJIT_INST_5x(casp, Casp, Gp, Gp, Gp, Gp, Mem)
+  ASMJIT_INST_5x(caspa, Caspa, Gp, Gp, Gp, Gp, Mem)
+  ASMJIT_INST_5x(caspal, Caspal, Gp, Gp, Gp, Gp, Mem)
+  ASMJIT_INST_5x(caspl, Caspl, Gp, Gp, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(ldadd, Ldadd, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldadda, Ldadda, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddab, Ldaddab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddah, Ldaddah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddal, Ldaddal, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddalb, Ldaddalb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddalh, Ldaddalh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddb, Ldaddb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddh, Ldaddh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddl, Ldaddl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddlb, Ldaddlb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddlh, Ldaddlh, Gp, Gp, Mem)
+
+  ASMJIT_INST_2x(ldar, Ldar, Gp, Mem)
+  ASMJIT_INST_2x(ldarb, Ldarb, Gp, Mem)
+  ASMJIT_INST_2x(ldarh, Ldarh, Gp, Mem)
+
+  ASMJIT_INST_2x(ldaxr, Ldaxr, Gp, Mem)
+  ASMJIT_INST_2x(ldaxrb, Ldaxrb, Gp, Mem)
+  ASMJIT_INST_2x(ldaxrh, Ldaxrh, Gp, Mem)
+
+  ASMJIT_INST_3x(ldclr, Ldclr, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclra, Ldclra, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclrab, Ldclrab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclrah, Ldclrah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclral, Ldclral, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclralb, Ldclralb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclralh, Ldclralh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclrb, Ldclrb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclrh, Ldclrh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclrl, Ldclrl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclrlb, Ldclrlb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclrlh, Ldclrlh, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(ldeor, Ldeor, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeora, Ldeora, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeorab, Ldeorab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeorah, Ldeorah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeoral, Ldeoral, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeoralb, Ldeoralb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeoralh, Ldeoralh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeorb, Ldeorb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeorh, Ldeorh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeorl, Ldeorl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeorlb, Ldeorlb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeorlh, Ldeorlh, Gp, Gp, Mem)
+
+  ASMJIT_INST_2x(ldlar, Ldlar, Gp, Mem)
+  ASMJIT_INST_2x(ldlarb, Ldlarb, Gp, Mem)
+  ASMJIT_INST_2x(ldlarh, Ldlarh, Gp, Mem)
+
+  ASMJIT_INST_3x(ldnp, Ldnp, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(ldp, Ldp, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldpsw, Ldpsw, Gp, Gp, Mem)
+
+  ASMJIT_INST_2x(ldr, Ldr, Gp, Mem)
+  ASMJIT_INST_2x(ldrb, Ldrb, Gp, Mem)
+  ASMJIT_INST_2x(ldrh, Ldrh, Gp, Mem)
+  ASMJIT_INST_2x(ldrsb, Ldrsb, Gp, Mem)
+  ASMJIT_INST_2x(ldrsh, Ldrsh, Gp, Mem)
+  ASMJIT_INST_2x(ldrsw, Ldrsw, Gp, Mem)
+
+  ASMJIT_INST_3x(ldset, Ldset, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldseta, Ldseta, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetab, Ldsetab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetah, Ldsetah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetal, Ldsetal, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetalb, Ldsetalb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetalh, Ldsetalh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetb, Ldsetb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldseth, Ldseth, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetl, Ldsetl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetlb, Ldsetlb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetlh, Ldsetlh, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(ldsmax, Ldsmax, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxa, Ldsmaxa, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxab, Ldsmaxab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxah, Ldsmaxah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxal, Ldsmaxal, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxalb, Ldsmaxalb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxalh, Ldsmaxalh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxb, Ldsmaxb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxh, Ldsmaxh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxl, Ldsmaxl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxlb, Ldsmaxlb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxlh, Ldsmaxlh, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(ldsmin, Ldsmin, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmina, Ldsmina, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminab, Ldsminab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminah, Ldsminah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminal, Ldsminal, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminalb, Ldsminalb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminalh, Ldsminalh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminb, Ldsminb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminh, Ldsminh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminl, Ldsminl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminlb, Ldsminlb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminlh, Ldsminlh, Gp, Gp, Mem)
+
+  ASMJIT_INST_2x(ldtr, Ldtr, Gp, Mem)
+  ASMJIT_INST_2x(ldtrb, Ldtrb, Gp, Mem)
+  ASMJIT_INST_2x(ldtrh, Ldtrh, Gp, Mem)
+  ASMJIT_INST_2x(ldtrsb, Ldtrsb, Gp, Mem)
+  ASMJIT_INST_2x(ldtrsh, Ldtrsh, Gp, Mem)
+  ASMJIT_INST_2x(ldtrsw, Ldtrsw, Gp, Mem)
+
+  ASMJIT_INST_3x(ldumax, Ldumax, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxa, Ldumaxa, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxab, Ldumaxab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxah, Ldumaxah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxal, Ldumaxal, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxalb, Ldumaxalb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxalh, Ldumaxalh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxb, Ldumaxb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxh, Ldumaxh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxl, Ldumaxl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxlb, Ldumaxlb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxlh, Ldumaxlh, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(ldumin, Ldumin, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumina, Ldumina, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminab, Lduminab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminah, Lduminah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminal, Lduminal, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminalb, Lduminalb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminalh, Lduminalh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminb, Lduminb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminh, Lduminh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminl, Lduminl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminlb, Lduminlb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminlh, Lduminlh, Gp, Gp, Mem)
+
+  ASMJIT_INST_2x(ldur, Ldur, Gp, Mem)
+  ASMJIT_INST_2x(ldurb, Ldurb, Gp, Mem)
+  ASMJIT_INST_2x(ldurh, Ldurh, Gp, Mem)
+  ASMJIT_INST_2x(ldursb, Ldursb, Gp, Mem)
+  ASMJIT_INST_2x(ldursh, Ldursh, Gp, Mem)
+  ASMJIT_INST_2x(ldursw, Ldursw, Gp, Mem)
+
+  ASMJIT_INST_3x(ldxp, Ldxp, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaxp, Ldaxp, Gp, Gp, Mem)
+
+  ASMJIT_INST_2x(ldxr, Ldxr, Gp, Mem)
+  ASMJIT_INST_2x(ldxrb, Ldxrb, Gp, Mem)
+  ASMJIT_INST_2x(ldxrh, Ldxrh, Gp, Mem)
+
+  ASMJIT_INST_2x(stadd, Stadd, Gp, Mem)
+  ASMJIT_INST_2x(staddb, Staddb, Gp, Mem)
+  ASMJIT_INST_2x(staddh, Staddh, Gp, Mem)
+  ASMJIT_INST_2x(staddl, Staddl, Gp, Mem)
+  ASMJIT_INST_2x(staddlb, Staddlb, Gp, Mem)
+  ASMJIT_INST_2x(staddlh, Staddlh, Gp, Mem)
+
+  ASMJIT_INST_2x(stclr, Stclr, Gp, Mem)
+  ASMJIT_INST_2x(stclrb, Stclrb, Gp, Mem)
+  ASMJIT_INST_2x(stclrh, Stclrh, Gp, Mem)
+  ASMJIT_INST_2x(stclrl, Stclrl, Gp, Mem)
+  ASMJIT_INST_2x(stclrlb, Stclrlb, Gp, Mem)
+  ASMJIT_INST_2x(stclrlh, Stclrlh, Gp, Mem)
+
+  ASMJIT_INST_2x(steor, Steor, Gp, Mem)
+  ASMJIT_INST_2x(steorb, Steorb, Gp, Mem)
+  ASMJIT_INST_2x(steorh, Steorh, Gp, Mem)
+  ASMJIT_INST_2x(steorl, Steorl, Gp, Mem)
+  ASMJIT_INST_2x(steorlb, Steorlb, Gp, Mem)
+  ASMJIT_INST_2x(steorlh, Steorlh, Gp, Mem)
+
+  ASMJIT_INST_2x(stllr, Stllr, Gp, Mem)
+  ASMJIT_INST_2x(stllrb, Stllrb, Gp, Mem)
+  ASMJIT_INST_2x(stllrh, Stllrh, Gp, Mem)
+
+  ASMJIT_INST_2x(stlr, Stllr, Gp, Mem)
+  ASMJIT_INST_2x(stlrb, Stllrb, Gp, Mem)
+  ASMJIT_INST_2x(stlrh, Stllrh, Gp, Mem)
+
+  ASMJIT_INST_3x(stlxr, Stlxr, Gp, Gp, Mem)
+  ASMJIT_INST_3x(stlxrb, Stlxrb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(stlxrh, Stlxrh, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(stnp, Stnp, Gp, Gp, Mem)
+  ASMJIT_INST_3x(stp, Stp, Gp, Gp, Mem)
+
+  ASMJIT_INST_2x(str, Str, Gp, Mem)
+  ASMJIT_INST_2x(strb, Strb, Gp, Mem)
+  ASMJIT_INST_2x(strh, Strh, Gp, Mem)
+
+  ASMJIT_INST_2x(stset, Stset, Gp, Mem)
+  ASMJIT_INST_2x(stsetb, Stsetb, Gp, Mem)
+  ASMJIT_INST_2x(stseth, Stseth, Gp, Mem)
+  ASMJIT_INST_2x(stsetl, Stsetl, Gp, Mem)
+  ASMJIT_INST_2x(stsetlb, Stsetlb, Gp, Mem)
+  ASMJIT_INST_2x(stsetlh, Stsetlh, Gp, Mem)
+
+  ASMJIT_INST_2x(stsmax, Stsmax, Gp, Mem)
+  ASMJIT_INST_2x(stsmaxb, Stsmaxb, Gp, Mem)
+  ASMJIT_INST_2x(stsmaxh, Stsmaxh, Gp, Mem)
+  ASMJIT_INST_2x(stsmaxl, Stsmaxl, Gp, Mem)
+  ASMJIT_INST_2x(stsmaxlb, Stsmaxlb, Gp, Mem)
+  ASMJIT_INST_2x(stsmaxlh, Stsmaxlh, Gp, Mem)
+
+  ASMJIT_INST_2x(stsmin, Stsmin, Gp, Mem)
+  ASMJIT_INST_2x(stsminb, Stsminb, Gp, Mem)
+  ASMJIT_INST_2x(stsminh, Stsminh, Gp, Mem)
+  ASMJIT_INST_2x(stsminl, Stsminl, Gp, Mem)
+  ASMJIT_INST_2x(stsminlb, Stsminlb, Gp, Mem)
+  ASMJIT_INST_2x(stsminlh, Stsminlh, Gp, Mem)
+
+  ASMJIT_INST_2x(sttr, Sttr, Gp, Mem)
+  ASMJIT_INST_2x(sttrb, Sttrb, Gp, Mem)
+  ASMJIT_INST_2x(sttrh, Sttrh, Gp, Mem)
+
+  ASMJIT_INST_2x(stumax, Stumax, Gp, Mem)
+  ASMJIT_INST_2x(stumaxb, Stumaxb, Gp, Mem)
+  ASMJIT_INST_2x(stumaxh, Stumaxh, Gp, Mem)
+  ASMJIT_INST_2x(stumaxl, Stumaxl, Gp, Mem)
+  ASMJIT_INST_2x(stumaxlb, Stumaxlb, Gp, Mem)
+  ASMJIT_INST_2x(stumaxlh, Stumaxlh, Gp, Mem)
+
+  ASMJIT_INST_2x(stumin, Stumin, Gp, Mem)
+  ASMJIT_INST_2x(stuminb, Stuminb, Gp, Mem)
+  ASMJIT_INST_2x(stuminh, Stuminh, Gp, Mem)
+  ASMJIT_INST_2x(stuminl, Stuminl, Gp, Mem)
+  ASMJIT_INST_2x(stuminlb, Stuminlb, Gp, Mem)
+  ASMJIT_INST_2x(stuminlh, Stuminlh, Gp, Mem)
+
+  ASMJIT_INST_2x(stur, Stur, Gp, Mem)
+  ASMJIT_INST_2x(sturb, Sturb, Gp, Mem)
+  ASMJIT_INST_2x(sturh, Sturh, Gp, Mem)
+
+  ASMJIT_INST_4x(stxp, Stxp, Gp, Gp, Gp, Mem)
+  ASMJIT_INST_4x(stlxp, Stlxp, Gp, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(stxr, Stxr, Gp, Gp, Mem)
+  ASMJIT_INST_3x(stxrb, Stxrb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(stxrh, Stxrh, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(swp, Swp, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swpa, Swpa, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swpab, Swpab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swpah, Swpah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swpal, Swpal, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swpalb, Swpalb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swpalh, Swpalh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swpb, Swpb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swph, Swph, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swpl, Swpl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swplb, Swplb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swplh, Swplh, Gp, Gp, Mem)
+  //! \}
+
+  //! \name CRC Instructions (ARMv8.1-A, optional in ARMv8.0-A)
+  //! \{
+
+  ASMJIT_INST_3x(crc32b, Crc32b, Gp, Gp, Gp);
+  ASMJIT_INST_3x(crc32h, Crc32h, Gp, Gp, Gp);
+  ASMJIT_INST_3x(crc32w, Crc32w, Gp, Gp, Gp);
+  ASMJIT_INST_3x(crc32x, Crc32x, Gp, Gp, Gp);
+
+  ASMJIT_INST_3x(crc32cb, Crc32cb, Gp, Gp, Gp);
+  ASMJIT_INST_3x(crc32ch, Crc32ch, Gp, Gp, Gp);
+  ASMJIT_INST_3x(crc32cw, Crc32cw, Gp, Gp, Gp);
+  ASMJIT_INST_3x(crc32cx, Crc32cx, Gp, Gp, Gp);
+
+  //! \}
+
+  //! \name MTE Instructions
+  //! \{
+
+  ASMJIT_INST_2x(autda, Autda, Gp, Gp);
+  ASMJIT_INST_2x(autdb, Autdb, Gp, Gp);
+  ASMJIT_INST_1x(autdza, Autdza, Gp);
+  ASMJIT_INST_1x(autdzb, Autdzb, Gp);
+  ASMJIT_INST_2x(autia, Autia, Gp, Gp);
+  ASMJIT_INST_0x(autia1716, Autia1716);
+  ASMJIT_INST_0x(autiasp, Autiasp);
+  ASMJIT_INST_0x(autiaz, Autiaz);
+  ASMJIT_INST_2x(autib, Autib, Gp, Gp);
+  ASMJIT_INST_0x(autib1716, Autib1716);
+  ASMJIT_INST_0x(autibsp, Autibsp);
+  ASMJIT_INST_0x(autibz, Autibz);
+  ASMJIT_INST_1x(autiza, Autiza, Gp);
+  ASMJIT_INST_1x(autizb, Autizb, Gp);
+
+  ASMJIT_INST_3x(gmi, Gmi, Gp, Gp, Gp);
+
+  ASMJIT_INST_2x(cmpp, Cmpp, Gp, Gp);
+  ASMJIT_INST_4x(addg, Addg, Gp, Gp, Imm, Imm);
+
+  ASMJIT_INST_2x(ldg, Ldg, Gp, Mem)
+  ASMJIT_INST_2x(ldgm, Ldgm, Gp, Mem)
+  ASMJIT_INST_2x(ldraa, Ldraa, Gp, Mem)
+  ASMJIT_INST_2x(ldrab, Ldrab, Gp, Mem)
+
+  ASMJIT_INST_2x(pacda, Pacda, Gp, Gp);
+  ASMJIT_INST_2x(pacdb, Pacdb, Gp, Gp);
+  ASMJIT_INST_1x(pacdza, Pacdza, Gp);
+  ASMJIT_INST_1x(pacdzb, Pacdzb, Gp);
+  ASMJIT_INST_3x(pacga, Pacga, Gp, Gp, Gp);
+
+  ASMJIT_INST_3x(subp, Subp, Gp, Gp, Gp);
+  ASMJIT_INST_3x(subps, Subps, Gp, Gp, Gp);
+  ASMJIT_INST_4x(subg, Subg, Gp, Gp, Imm, Imm);
+
+  ASMJIT_INST_2x(st2g, St2g, Gp, Mem)
+  ASMJIT_INST_2x(stg, Stg, Gp, Mem)
+  ASMJIT_INST_3x(stgp, Stgp, Gp, Gp, Mem)
+  ASMJIT_INST_2x(stgm, Stgm, Gp, Mem)
+  ASMJIT_INST_2x(stzg, Stzg, Gp, Mem)
+  ASMJIT_INST_2x(stz2g, Stz2g, Gp, Mem)
+  ASMJIT_INST_2x(stzgm, Stzgm, Gp, Mem)
+
+  ASMJIT_INST_1x(xpacd, Xpacd, Gp);
+  ASMJIT_INST_1x(xpaci, Xpaci, Gp);
+  ASMJIT_INST_0x(xpaclri, Xpaclri);
+
+  //! \}
+
+  //! \name Hint Instructions
+  //! \{
+
+  ASMJIT_INST_1x(hint, Hint, Imm)
+  ASMJIT_INST_0x(nop, Nop)
+  ASMJIT_INST_0x(sev, Sev)
+  ASMJIT_INST_0x(sevl, Sevl)
+  ASMJIT_INST_0x(wfe, Wfe)
+  ASMJIT_INST_0x(wfi, Wfi)
+  ASMJIT_INST_0x(yield, Yield)
+
+  //! \}
+
+  //! \name SIMD & FP Instructions
+  //! \{
+
+  ASMJIT_INST_2x(abs, Abs_v, Vec, Vec);
+  ASMJIT_INST_3x(add, Add_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(addhn, Addhn_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(addhn2, Addhn2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(addp, Addp_v, Vec, Vec);
+  ASMJIT_INST_3x(addp, Addp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(addv, Addv_v, Vec, Vec);
+  ASMJIT_INST_3x(and_, And_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(bic, Bic_v, Vec, Imm);
+  ASMJIT_INST_3x(bic, Bic_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(bic, Bic_v, Vec, Imm, Imm);
+  ASMJIT_INST_3x(bif, Bif_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(bit, Bit_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(bsl, Bsl_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(cls, Cls_v, Vec, Vec);
+  ASMJIT_INST_2x(clz, Clz_v, Vec, Vec);
+  ASMJIT_INST_3x(cmeq, Cmeq_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(cmeq, Cmeq_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(cmge, Cmge_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(cmge, Cmge_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(cmgt, Cmgt_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(cmgt, Cmgt_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(cmhi, Cmhi_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(cmhs, Cmhs_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(cmle, Cmle_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(cmlt, Cmlt_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(cmtst, Cmtst_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(cnt, Cnt_v, Vec, Vec);
+  ASMJIT_INST_2x(dup, Dup_v, Vec, Gp);
+  ASMJIT_INST_2x(dup, Dup_v, Vec, Vec);
+  ASMJIT_INST_3x(eor, Eor_v, Vec, Vec, Vec);
+  ASMJIT_INST_4x(ext, Ext_v, Vec, Vec, Vec, Imm);
+  ASMJIT_INST_3x(fabd, Fabd_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fabs, Fabs_v, Vec, Vec);
+  ASMJIT_INST_3x(facge, Facge_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(facgt, Facgt_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fadd, Fadd_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(faddp, Faddp_v, Vec, Vec);
+  ASMJIT_INST_3x(faddp, Faddp_v, Vec, Vec, Vec);
+  ASMJIT_INST_4x(fccmp, Fccmp_v, Vec, Vec, Imm, Imm);
+  ASMJIT_INST_4x(fccmpe, Fccmpe_v, Vec, Vec, Imm, Imm);
+  ASMJIT_INST_3x(fcmeq, Fcmeq_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fcmeq, Fcmeq_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(fcmge, Fcmge_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fcmge, Fcmge_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(fcmgt, Fcmgt_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fcmgt, Fcmgt_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(fcmle, Fcmle_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(fcmlt, Fcmlt_v, Vec, Vec, Imm);
+  ASMJIT_INST_2x(fcmp, Fcmp_v, Vec, Vec);
+  ASMJIT_INST_2x(fcmp, Fcmp_v, Vec, Imm);
+  ASMJIT_INST_2x(fcmpe, Fcmpe_v, Vec, Vec);
+  ASMJIT_INST_2x(fcmpe, Fcmpe_v, Vec, Imm);
+  ASMJIT_INST_4x(fcsel, Fcsel_v, Vec, Vec, Vec, Imm);
+  ASMJIT_INST_2x(fcvt, Fcvt_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtas, Fcvtas_v, Gp, Vec);
+  ASMJIT_INST_2x(fcvtas, Fcvtas_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtau, Fcvtau_v, Gp, Vec);
+  ASMJIT_INST_2x(fcvtau, Fcvtau_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtl, Fcvtl_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtl2, Fcvtl2_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtms, Fcvtms_v, Gp, Vec);
+  ASMJIT_INST_2x(fcvtms, Fcvtms_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtmu, Fcvtmu_v, Gp, Vec);
+  ASMJIT_INST_2x(fcvtmu, Fcvtmu_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtn, Fcvtn_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtn2, Fcvtn2_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtns, Fcvtns_v, Gp, Vec);
+  ASMJIT_INST_2x(fcvtns, Fcvtns_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtnu, Fcvtnu_v, Gp, Vec);
+  ASMJIT_INST_2x(fcvtnu, Fcvtnu_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtps, Fcvtps_v, Gp, Vec);
+  ASMJIT_INST_2x(fcvtps, Fcvtps_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtpu, Fcvtpu_v, Gp, Vec);
+  ASMJIT_INST_2x(fcvtpu, Fcvtpu_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtxn, Fcvtxn_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtxn2, Fcvtxn2_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtzs, Fcvtzs_v, Gp, Vec);
+  ASMJIT_INST_3x(fcvtzs, Fcvtzs_v, Gp, Vec, Imm);
+  ASMJIT_INST_2x(fcvtzs, Fcvtzs_v, Vec, Vec);
+  ASMJIT_INST_3x(fcvtzs, Fcvtzs_v, Vec, Vec, Imm);
+  ASMJIT_INST_2x(fcvtzu, Fcvtzu_v, Gp, Vec);
+  ASMJIT_INST_3x(fcvtzu, Fcvtzu_v, Gp, Vec, Imm);
+  ASMJIT_INST_2x(fcvtzu, Fcvtzu_v, Vec, Vec);
+  ASMJIT_INST_3x(fcvtzu, Fcvtzu_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(fdiv, Fdiv_v, Vec, Vec, Vec);
+  ASMJIT_INST_4x(fmadd, Fmadd_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmax, Fmax_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmaxnm, Fmaxnm_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmaxnmp, Fmaxnmp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fmaxnmp, Fmaxnmp_v, Vec, Vec);
+  ASMJIT_INST_2x(fmaxnmv, Fmaxnmv_v, Vec, Vec);
+  ASMJIT_INST_3x(fmaxp, Fmaxp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fmaxp, Fmaxp_v, Vec, Vec);
+  ASMJIT_INST_2x(fmaxv, Fmaxv_v, Vec, Vec);
+  ASMJIT_INST_3x(fmin, Fmin_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fminnm, Fminnm_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fminnmv, Fminnmv_v, Vec, Vec);
+  ASMJIT_INST_3x(fminnmp, Fminnmp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fminnmp, Fminnmp_v, Vec, Vec);
+  ASMJIT_INST_2x(fminp, Fminp_v, Vec, Vec);
+  ASMJIT_INST_3x(fminp, Fminp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fminv, Fminv_v, Vec, Vec);
+  ASMJIT_INST_3x(fmla, Fmla_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmls, Fmls_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fmov, Fmov_v, Gp, Vec);
+  ASMJIT_INST_2x(fmov, Fmov_v, Vec, Gp);
+  ASMJIT_INST_2x(fmov, Fmov_v, Vec, Vec);
+  ASMJIT_INST_2x(fmov, Fmov_v, Vec, Imm);
+  ASMJIT_INST_4x(fmsub, Fmsub_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmul, Fmul_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmulx, Fmulx_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fneg, Fneg_v, Vec, Vec);
+  ASMJIT_INST_4x(fnmadd, Fnmadd_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_4x(fnmsub, Fnmsub_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fnmul, Fnmul_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(frecpe, Frecpe_v, Vec, Vec);
+  ASMJIT_INST_3x(frecps, Frecps_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(frecpx, Frecpx_v, Vec, Vec);
+  ASMJIT_INST_2x(frint32x, Frint32x_v, Vec, Vec);
+  ASMJIT_INST_2x(frint32z, Frint32z_v, Vec, Vec);
+  ASMJIT_INST_2x(frint64x, Frint64x_v, Vec, Vec);
+  ASMJIT_INST_2x(frint64z, Frint64z_v, Vec, Vec);
+  ASMJIT_INST_2x(frinta, Frinta_v, Vec, Vec);
+  ASMJIT_INST_2x(frinti, Frinti_v, Vec, Vec);
+  ASMJIT_INST_2x(frintm, Frintm_v, Vec, Vec);
+  ASMJIT_INST_2x(frintn, Frintn_v, Vec, Vec);
+  ASMJIT_INST_2x(frintp, Frintp_v, Vec, Vec);
+  ASMJIT_INST_2x(frintx, Frintx_v, Vec, Vec);
+  ASMJIT_INST_2x(frintz, Frintz_v, Vec, Vec);
+  ASMJIT_INST_2x(frsqrte, Frsqrte_v, Vec, Vec);
+  ASMJIT_INST_3x(frsqrts, Frsqrts_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fsqrt, Fsqrt_v, Vec, Vec);
+  ASMJIT_INST_3x(fsub, Fsub_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(ins, Ins_v, Vec, Gp);
+  ASMJIT_INST_2x(ins, Ins_v, Vec, Vec);
+  ASMJIT_INST_2x(ld1, Ld1_v, Vec, Mem);
+  ASMJIT_INST_3x(ld1, Ld1_v, Vec, Vec, Mem);
+  ASMJIT_INST_4x(ld1, Ld1_v, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_5x(ld1, Ld1_v, Vec, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_2x(ld1r, Ld1r_v, Vec, Mem);
+  ASMJIT_INST_3x(ld2, Ld2_v, Vec, Vec, Mem);
+  ASMJIT_INST_3x(ld2r, Ld2r_v, Vec, Vec, Mem);
+  ASMJIT_INST_4x(ld3, Ld3_v, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_4x(ld3r, Ld3r_v, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_5x(ld4, Ld4_v, Vec, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_5x(ld4r, Ld4r_v, Vec, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_3x(ldnp, Ldnp_v, Vec, Vec, Mem);
+  ASMJIT_INST_3x(ldp, Ldp_v, Vec, Vec, Mem);
+  ASMJIT_INST_2x(ldr, Ldr_v, Vec, Mem);
+  ASMJIT_INST_2x(ldur, Ldur_v, Vec, Mem);
+  ASMJIT_INST_3x(mla, Mla_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(mls, Mls_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(mov, Mov_v, Vec, Vec);
+  ASMJIT_INST_2x(mov, Mov_v, Gp, Vec);
+  ASMJIT_INST_2x(mov, Mov_v, Vec, Gp);
+  ASMJIT_INST_2x(movi, Movi_v, Vec, Imm);
+  ASMJIT_INST_3x(movi, Movi_v, Vec, Imm, Imm);
+  ASMJIT_INST_3x(mul, Mul_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(mvn, Mvn_v, Vec, Vec);
+  ASMJIT_INST_2x(mvni, Mvni_v, Vec, Imm);
+  ASMJIT_INST_3x(mvni, Mvni_v, Vec, Imm, Imm);
+  ASMJIT_INST_2x(neg, Neg_v, Vec, Vec);
+  ASMJIT_INST_2x(not_, Not_v, Vec, Vec);
+  ASMJIT_INST_3x(orn, Orn_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(orr, Orr_v, Vec, Imm);
+  ASMJIT_INST_3x(orr, Orr_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(orr, Orr_v, Vec, Imm, Imm);
+  ASMJIT_INST_3x(pmul, Pmul_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(pmull, Pmull_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(pmull2, Pmull2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(raddhn, Raddhn_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(raddhn2, Raddhn2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(rbit, Rbit_v, Vec, Vec);
+  ASMJIT_INST_2x(rev16, Rev16_v, Vec, Vec);
+  ASMJIT_INST_2x(rev32, Rev32_v, Vec, Vec);
+  ASMJIT_INST_2x(rev64, Rev64_v, Vec, Vec);
+  ASMJIT_INST_3x(rshrn, Rshrn_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(rshrn2, Rshrn2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(rsubhn, Rsubhn_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(rsubhn2, Rsubhn2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(saba, Saba_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sabal, Sabal_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sabal2, Sabal2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sabd, Sabd_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sabdl, Sabdl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sabdl2, Sabdl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sadalp, Sadalp_v, Vec, Vec);
+  ASMJIT_INST_3x(saddl, Saddl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(saddl2, Saddl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(saddlp, Saddlp_v, Vec, Vec);
+  ASMJIT_INST_2x(saddlv, Saddlv_v, Vec, Vec);
+  ASMJIT_INST_3x(saddw, Saddw_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(saddw2, Saddw2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(scvtf, Scvtf_v, Vec, Gp);
+  ASMJIT_INST_3x(scvtf, Scvtf_v, Vec, Gp, Imm);
+  ASMJIT_INST_2x(scvtf, Scvtf_v, Vec, Vec);
+  ASMJIT_INST_3x(scvtf, Scvtf_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(shadd, Shadd_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(shl, Shl_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(shll, Shll_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(shll2, Shll2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(shrn, Shrn_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(shrn2, Shrn2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(shsub, Shsub_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sli, Sli_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(smax, Smax_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(smaxp, Smaxp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(smaxv, Smaxv_v, Vec, Vec);
+  ASMJIT_INST_3x(smin, Smin_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sminp, Sminp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sminv, Sminv_v, Vec, Vec);
+  ASMJIT_INST_3x(smlal, Smlal_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(smlal2, Smlal2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(smlsl, Smlsl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(smlsl2, Smlsl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(smov, Smov_v, Gp, Vec);
+  ASMJIT_INST_3x(smull, Smull_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(smull2, Smull2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sqabs, Sqabs_v, Vec, Vec);
+  ASMJIT_INST_3x(sqadd, Sqadd_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqdmlal, Sqdmlal_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqdmlal2, Sqdmlal2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqdmlsl, Sqdmlsl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqdmlsl2, Sqdmlsl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqdmulh, Sqdmulh_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqdmull, Sqdmull_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqdmull2, Sqdmull2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sqneg, Sqneg_v, Vec, Vec);
+  ASMJIT_INST_3x(sqrdmulh, Sqrdmulh_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqrshl, Sqrshl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqrshrn, Sqrshrn_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqrshrn2, Sqrshrn2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqrshrun, Sqrshrun_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqrshrun2, Sqrshrun2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqshl, Sqshl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqshl, Sqshl_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqshlu, Sqshlu_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqshrn, Sqshrn_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqshrn2, Sqshrn2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqshrun, Sqshrun_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqshrun2, Sqshrun2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqsub, Sqsub_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sqxtn, Sqxtn_v, Vec, Vec);
+  ASMJIT_INST_2x(sqxtn2, Sqxtn2_v, Vec, Vec);
+  ASMJIT_INST_2x(sqxtun, Sqxtun_v, Vec, Vec);
+  ASMJIT_INST_2x(sqxtun2, Sqxtun2_v, Vec, Vec);
+  ASMJIT_INST_3x(srhadd, Srhadd_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sri, Sri_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(srshl, Srshl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(srshr, Srshr_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(srsra, Srsra_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sshl, Sshl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sshll, Sshll_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sshll2, Sshll2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sshr, Sshr_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(ssra, Ssra_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(ssubl, Ssubl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(ssubl2, Ssubl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(ssubw, Ssubw_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(ssubw2, Ssubw2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(st1, St1_v, Vec, Mem);
+  ASMJIT_INST_3x(st1, St1_v, Vec, Vec, Mem);
+  ASMJIT_INST_4x(st1, St1_v, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_5x(st1, St1_v, Vec, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_3x(st2, St2_v, Vec, Vec, Mem);
+  ASMJIT_INST_4x(st3, St3_v, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_5x(st4, St4_v, Vec, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_3x(stnp, Stnp_v, Vec, Vec, Mem);
+  ASMJIT_INST_3x(stp, Stp_v, Vec, Vec, Mem);
+  ASMJIT_INST_2x(str, Str_v, Vec, Mem);
+  ASMJIT_INST_2x(stur, Stur_v, Vec, Mem);
+  ASMJIT_INST_3x(sub, Sub_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(subhn, Subhn_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(subhn2, Subhn2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(suqadd, Suqadd_v, Vec, Vec);
+  ASMJIT_INST_2x(sxtl, Sxtl_v, Vec, Vec);
+  ASMJIT_INST_2x(sxtl2, Sxtl2_v, Vec, Vec);
+  ASMJIT_INST_3x(tbl, Tbl_v, Vec, Vec, Vec);
+  ASMJIT_INST_4x(tbl, Tbl_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_5x(tbl, Tbl_v, Vec, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_6x(tbl, Tbl_v, Vec, Vec, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_3x(tbx, Tbx_v, Vec, Vec, Vec);
+  ASMJIT_INST_4x(tbx, Tbx_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_5x(tbx, Tbx_v, Vec, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_6x(tbx, Tbx_v, Vec, Vec, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_3x(trn1, Trn1_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(trn2, Trn2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uaba, Uaba_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uabal, Uabal_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uabal2, Uabal2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uabd, Uabd_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uabdl, Uabdl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uabdl2, Uabdl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(uadalp, Uadalp_v, Vec, Vec);
+  ASMJIT_INST_3x(uaddl, Uaddl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uaddl2, Uaddl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(uaddlp, Uaddlp_v, Vec, Vec);
+  ASMJIT_INST_2x(uaddlv, Uaddlv_v, Vec, Vec);
+  ASMJIT_INST_3x(uaddw, Uaddw_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uaddw2, Uaddw2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(ucvtf, Ucvtf_v, Vec, Gp);
+  ASMJIT_INST_3x(ucvtf, Ucvtf_v, Vec, Gp, Imm);
+  ASMJIT_INST_2x(ucvtf, Ucvtf_v, Vec, Vec);
+  ASMJIT_INST_3x(ucvtf, Ucvtf_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(uhadd, Uhadd_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uhsub, Uhsub_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(umax, Umax_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(umaxp, Umaxp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(umaxv, Umaxv_v, Vec, Vec);
+  ASMJIT_INST_3x(umin, Umin_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uminp, Uminp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(uminv, Uminv_v, Vec, Vec);
+  ASMJIT_INST_3x(umlal, Umlal_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(umlal2, Umlal2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(umlsl, Umlsl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(umlsl2, Umlsl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(umov, Umov_v, Gp, Vec);
+  ASMJIT_INST_3x(umull, Umull_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(umull2, Umull2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uqadd, Uqadd_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uqrshl, Uqrshl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uqrshl, Uqrshl_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(uqrshrn, Uqrshrn_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(uqrshrn2, Uqrshrn2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(uqshl, Uqshl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uqshl, Uqshl_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(uqshrn, Uqshrn_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(uqshrn2, Uqshrn2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(uqsub, Uqsub_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(uqxtn, Uqxtn_v, Vec, Vec);
+  ASMJIT_INST_2x(uqxtn2, Uqxtn2_v, Vec, Vec);
+  ASMJIT_INST_2x(urecpe, Urecpe_v, Vec, Vec);
+  ASMJIT_INST_3x(urhadd, Urhadd_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(urshl, Urshl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(urshr, Urshr_v, Vec, Vec, Imm);
+  ASMJIT_INST_2x(ursqrte, Ursqrte_v, Vec, Vec);
+  ASMJIT_INST_3x(ursra, Ursra_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(ushl, Ushl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(ushll, Ushll_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(ushll2, Ushll2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(ushr, Ushr_v, Vec, Vec, Imm);
+  ASMJIT_INST_2x(usqadd, Usqadd_v, Vec, Vec);
+  ASMJIT_INST_3x(usra, Usra_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(usubl, Usubl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(usubl2, Usubl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(usubw, Usubw_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(usubw2, Usubw2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(uxtl, Uxtl_v, Vec, Vec);
+  ASMJIT_INST_2x(uxtl2, Uxtl2_v, Vec, Vec);
+  ASMJIT_INST_3x(uzp1, Uzp1_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uzp2, Uzp2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(xtn, Xtn_v, Vec, Vec);
+  ASMJIT_INST_2x(xtn2, Xtn2_v, Vec, Vec);
+  ASMJIT_INST_3x(zip1, Zip1_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(zip2, Zip2_v, Vec, Vec, Vec);
+
+  //! \}
+
+  //! \name AES Instructions
+  //! \{
+
+  ASMJIT_INST_2x(aesd, Aesd_v, Vec, Vec);
+  ASMJIT_INST_2x(aese, Aese_v, Vec, Vec);
+  ASMJIT_INST_2x(aesimc, Aesimc_v, Vec, Vec);
+  ASMJIT_INST_2x(aesmc, Aesmc_v, Vec, Vec);
+
+  //! \}
+
+  //! \name SHA1 Instructions
+  //! \{
+
+  ASMJIT_INST_3x(sha1c, Sha1c_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sha1h, Sha1h_v, Vec, Vec);
+  ASMJIT_INST_3x(sha1m, Sha1m_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sha1p, Sha1p_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sha1su0, Sha1su0_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sha1su1, Sha1su1_v, Vec, Vec);
+
+  //! \}
+
+  //! \name SHA2 Instructions
+  //! \{
+
+  ASMJIT_INST_3x(sha256h, Sha256h_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sha256h2, Sha256h2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sha256su0, Sha256su0_v, Vec, Vec);
+  ASMJIT_INST_3x(sha256su1, Sha256su1_v, Vec, Vec, Vec);
+
+  //! \}
+
+  //! \name RDMA Instructions (ARMv8.1-A)
+  //! \{
+
+  ASMJIT_INST_3x(sqrdmlah, Sqrdmlah_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqrdmlsh, Sqrdmlsh_v, Vec, Vec, Vec);
+
+  //! \}
+
+  //! \name FCMA Instruction (ARMv8.3-A)
+  //! \{
+
+  ASMJIT_INST_4x(fcadd, Fcadd_v, Vec, Vec, Vec, Imm);
+  ASMJIT_INST_4x(fcmla, Fcmla_v, Vec, Vec, Vec, Imm);
+
+  //! \}
+
+  //! \name FJCVTZS Instruction (ARMv8.3-A)
+  //! \{
+
+  ASMJIT_INST_2x(fjcvtzs, Fjcvtzs_v, Gp, Vec);
+
+  //! \}
+
+  //! \name FP16FML Instructions (ARMv8.4-A, optional in ARMv8.2-A)
+  //! \{
+
+  ASMJIT_INST_3x(fmlal, Fmlal_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmlal2, Fmlal2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmlsl, Fmlsl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmlsl2, Fmlsl2_v, Vec, Vec, Vec);
+
+
+  //! \}
+
+  //! \name SHA3 Instructions (ARMv8.4-A, optional in ARMv8.2-A)
+  //! \{
+
+  ASMJIT_INST_4x(bcax, Bcax_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_4x(eor3, Eor3_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_3x(rax1, Rax1_v, Vec, Vec, Vec);
+  ASMJIT_INST_4x(xar, Xar_v, Vec, Vec, Vec, Imm);
+
+  //! \}
+
+  //! \name SHA512 Instructions (ARMv8.4-A)
+  //! \{
+
+  ASMJIT_INST_3x(sha512h, Sha512h_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sha512h2, Sha512h2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sha512su0, Sha512su0_v, Vec, Vec);
+  ASMJIT_INST_3x(sha512su1, Sha512su1_v, Vec, Vec, Vec);
+
+  //! \}
+
+  //! \name SM3 Instructions (ARMv8.4-A)
+  //! \{
+
+  ASMJIT_INST_3x(sm3partw1, Sm3partw1_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sm3partw2, Sm3partw2_v, Vec, Vec, Vec);
+  ASMJIT_INST_4x(sm3ss1, Sm3ss1_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sm3tt1a, Sm3tt1a_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sm3tt1b, Sm3tt1b_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sm3tt2a, Sm3tt2a_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sm3tt2b, Sm3tt2b_v, Vec, Vec, Vec);
+
+  //! \}
+
+  //! \name SM4 Instructions (ARMv8.4-A)
+  //! \{
+
+  ASMJIT_INST_2x(sm4e, Sm4e_v, Vec, Vec);
+  ASMJIT_INST_3x(sm4ekey, Sm4ekey_v, Vec, Vec, Vec);
+
+  //! \}
+
+  //! \name DOTPROD Instructions (ARMv8.4-A, optional in ARMv8.2-A)
+  //! \{
+
+  ASMJIT_INST_3x(sdot, Sdot_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(udot, Udot_v, Vec, Vec, Vec);
+
+  //! \}
+
+  //! \name BF16 Instructions (ARMv8.6-A)
+  //! \{
+
+  ASMJIT_INST_2x(bfcvt, Bfcvt_v, Vec, Vec);
+  ASMJIT_INST_2x(bfcvtn, Bfcvtn_v, Vec, Vec);
+  ASMJIT_INST_2x(bfcvtn2, Bfcvtn2_v, Vec, Vec);
+  ASMJIT_INST_3x(bfmlalb, Bfmlalb_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(bfmlalt, Bfmlalt_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(bfmmla, Bfmmla_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(bfdot, Bfdot_v, Vec, Vec, Vec);
+
+  //! \}
+
+  //! \name I8MM Instructions (ARMv8.6-A)
+  //! \{
+
+  ASMJIT_INST_3x(smmla, Smmla_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sudot, Sudot_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(ummla, Ummla_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(usdot, Usdot_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(usmmla, Usmmla_v, Vec, Vec, Vec);
+
+  //! \}
+};
+
+//! Emitter (ARM).
+//!
+//! \note This class cannot be instantiated, you can only cast to it and use it as emitter that emits to either
+//! `a64::Assembler`, `a64::Builder`, or `a64::Compiler` (use with caution with `a64::Compiler` as it requires
+//! virtual registers).
+class Emitter : public BaseEmitter, public EmitterExplicitT<Emitter> {
+  ASMJIT_NONCONSTRUCTIBLE(Emitter)
+};
+
+//! \}
+
+#undef ASMJIT_INST_0x
+#undef ASMJIT_INST_1x
+#undef ASMJIT_INST_2x
+#undef ASMJIT_INST_3x
+#undef ASMJIT_INST_4x
+#undef ASMJIT_INST_5x
+#undef ASMJIT_INST_6x
+#undef ASMJIT_INST_1cc
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64EMITTER_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64formatter.cpp b/lib/lepton/asmjit/arm/a64formatter.cpp
new file mode 100644
index 0000000000..bccb68b99b
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64formatter.cpp
@@ -0,0 +1,298 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../arm/a64formatter_p.h"
+#include "../arm/a64instapi_p.h"
+#include "../arm/a64instdb_p.h"
+#include "../arm/a64operand.h"
+
+#ifndef ASMJIT_NO_COMPILER
+  #include "../core/compiler.h"
+#endif
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::FormatterInternal - Format Register
+// ========================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatRegister(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  RegType regType,
+  uint32_t rId,
+  uint32_t elementType,
+  uint32_t elementIndex) noexcept {
+
+  DebugUtils::unused(flags);
+  DebugUtils::unused(arch);
+
+  static const char bhsdq[] = "bhsdq";
+
+  bool virtRegFormatted = false;
+
+#ifndef ASMJIT_NO_COMPILER
+  if (Operand::isVirtId(rId)) {
+    if (emitter && emitter->isCompiler()) {
+      const BaseCompiler* cc = static_cast<const BaseCompiler*>(emitter);
+      if (cc->isVirtIdValid(rId)) {
+        VirtReg* vReg = cc->virtRegById(rId);
+        ASMJIT_ASSERT(vReg != nullptr);
+
+        const char* name = vReg->name();
+        if (name && name[0] != '\0')
+          ASMJIT_PROPAGATE(sb.append(name));
+        else
+          ASMJIT_PROPAGATE(sb.appendFormat("%%%u", unsigned(Operand::virtIdToIndex(rId))));
+
+        virtRegFormatted = true;
+      }
+    }
+  }
+#else
+  DebugUtils::unused(emitter, flags);
+#endif
+
+  if (!virtRegFormatted) {
+    char letter = '\0';
+    switch (regType) {
+      case RegType::kARM_GpW:
+        if (rId == Gp::kIdZr)
+          return sb.append("wzr");
+        if (rId == Gp::kIdSp)
+          return sb.append("wsp");
+
+        letter = 'w';
+        break;
+
+      case RegType::kARM_GpX:
+        if (rId == Gp::kIdZr)
+          return sb.append("xzr");
+        if (rId == Gp::kIdSp)
+          return sb.append("sp");
+
+        letter = 'x';
+        break;
+
+      case RegType::kARM_VecB:
+      case RegType::kARM_VecH:
+      case RegType::kARM_VecS:
+      case RegType::kARM_VecD:
+      case RegType::kARM_VecV:
+        letter = bhsdq[uint32_t(regType) - uint32_t(RegType::kARM_VecB)];
+        if (elementType)
+          letter = 'v';
+        break;
+
+      default:
+        ASMJIT_PROPAGATE(sb.appendFormat("<Reg-%u>?$u", uint32_t(regType), rId));
+        break;
+    }
+
+    if (letter)
+      ASMJIT_PROPAGATE(sb.appendFormat("%c%u", letter, rId));
+  }
+
+  if (elementType) {
+    char elementLetter = '\0';
+    uint32_t elementCount = 0;
+
+    switch (elementType) {
+      case Vec::kElementTypeB:
+        elementLetter = 'b';
+        elementCount = 16;
+        break;
+
+      case Vec::kElementTypeH:
+        elementLetter = 'h';
+        elementCount = 8;
+        break;
+
+      case Vec::kElementTypeS:
+        elementLetter = 's';
+        elementCount = 4;
+        break;
+
+      case Vec::kElementTypeD:
+        elementLetter = 'd';
+        elementCount = 2;
+        break;
+
+      default:
+        return sb.append(".<Unknown>");
+    }
+
+    if (elementLetter) {
+      if (elementIndex == 0xFFFFFFFFu) {
+        if (regType == RegType::kARM_VecD)
+          elementCount /= 2u;
+        ASMJIT_PROPAGATE(sb.appendFormat(".%u%c", elementCount, elementLetter));
+      }
+      else {
+        ASMJIT_PROPAGATE(sb.appendFormat(".%c[%u]", elementLetter, elementIndex));
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+// a64::FormatterInternal - Format Operand
+// =======================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatOperand(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const Operand_& op) noexcept {
+
+  if (op.isReg()) {
+    const BaseReg& reg = op.as<BaseReg>();
+
+    uint32_t elementType = op.as<Vec>().elementType();
+    uint32_t elementIndex = op.as<Vec>().elementIndex();
+
+    if (!op.as<Vec>().hasElementIndex())
+      elementIndex = 0xFFFFFFFFu;
+
+    return formatRegister(sb, flags, emitter, arch, reg.type(), reg.id(), elementType, elementIndex);
+  }
+
+  if (op.isMem()) {
+    const Mem& m = op.as<Mem>();
+    ASMJIT_PROPAGATE(sb.append('['));
+
+    if (m.hasBase()) {
+      if (m.hasBaseLabel()) {
+        ASMJIT_PROPAGATE(Formatter::formatLabel(sb, flags, emitter, m.baseId()));
+      }
+      else {
+        FormatFlags modifiedFlags = flags;
+        if (m.isRegHome()) {
+          ASMJIT_PROPAGATE(sb.append('&'));
+          modifiedFlags &= ~FormatFlags::kRegCasts;
+        }
+        ASMJIT_PROPAGATE(formatRegister(sb, modifiedFlags, emitter, arch, m.baseType(), m.baseId()));
+      }
+    }
+    else {
+      // ARM really requires base.
+      if (m.hasIndex() || m.hasOffset()) {
+        ASMJIT_PROPAGATE(sb.append("<None>"));
+      }
+    }
+
+    // The post index makes it look like there was another operand, but it's
+    // still the part of AsmJit's `arm::Mem` operand so it's consistent with
+    // other architectures.
+    if (m.isPostIndex())
+      ASMJIT_PROPAGATE(sb.append(']'));
+
+    if (m.hasIndex()) {
+      ASMJIT_PROPAGATE(sb.append(", "));
+      ASMJIT_PROPAGATE(formatRegister(sb, flags, emitter, arch, m.indexType(), m.indexId()));
+    }
+
+    if (m.hasOffset()) {
+      ASMJIT_PROPAGATE(sb.append(", "));
+
+      int64_t off = int64_t(m.offset());
+      uint32_t base = 10;
+
+      if (Support::test(flags, FormatFlags::kHexOffsets) && uint64_t(off) > 9)
+        base = 16;
+
+      if (base == 10) {
+        ASMJIT_PROPAGATE(sb.appendInt(off, base));
+      }
+      else {
+        ASMJIT_PROPAGATE(sb.append("0x"));
+        ASMJIT_PROPAGATE(sb.appendUInt(uint64_t(off), base));
+      }
+    }
+
+    if (m.hasShift()) {
+      ASMJIT_PROPAGATE(sb.append(' '));
+      if (!m.isPreOrPost())
+        ASMJIT_PROPAGATE(formatShiftOp(sb, (ShiftOp)m.predicate()));
+      ASMJIT_PROPAGATE(sb.appendFormat(" %u", m.shift()));
+    }
+
+    if (!m.isPostIndex())
+      ASMJIT_PROPAGATE(sb.append(']'));
+
+    if (m.isPreIndex())
+      ASMJIT_PROPAGATE(sb.append('!'));
+
+    return kErrorOk;
+  }
+
+  if (op.isImm()) {
+    const Imm& i = op.as<Imm>();
+    int64_t val = i.value();
+
+    if (Support::test(flags, FormatFlags::kHexImms) && uint64_t(val) > 9) {
+      ASMJIT_PROPAGATE(sb.append("0x"));
+      return sb.appendUInt(uint64_t(val), 16);
+    }
+    else {
+      return sb.appendInt(val, 10);
+    }
+  }
+
+  if (op.isLabel()) {
+    return Formatter::formatLabel(sb, flags, emitter, op.id());
+  }
+
+  return sb.append("<None>");
+}
+
+// a64::FormatterInternal - Format Instruction
+// ===========================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatInstruction(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept {
+
+  DebugUtils::unused(arch);
+
+  // Format instruction options and instruction mnemonic.
+  InstId instId = inst.realId();
+  if (instId < Inst::_kIdCount)
+    ASMJIT_PROPAGATE(InstInternal::instIdToString(arch, instId, sb));
+  else
+    ASMJIT_PROPAGATE(sb.appendFormat("[InstId=#%u]", unsigned(instId)));
+
+  CondCode cc = inst.armCondCode();
+  if (cc != CondCode::kAL) {
+    ASMJIT_PROPAGATE(sb.append('.'));
+    ASMJIT_PROPAGATE(formatCondCode(sb, cc));
+  }
+
+  for (uint32_t i = 0; i < opCount; i++) {
+    const Operand_& op = operands[i];
+    if (op.isNone())
+      break;
+
+    ASMJIT_PROPAGATE(sb.append(i == 0 ? " " : ", "));
+    ASMJIT_PROPAGATE(formatOperand(sb, flags, emitter, arch, op));
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
diff --git a/lib/lepton/asmjit/arm/a64formatter_p.h b/lib/lepton/asmjit/arm/a64formatter_p.h
new file mode 100644
index 0000000000..bd7a1440cb
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64formatter_p.h
@@ -0,0 +1,59 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64FORMATTER_P_H_INCLUDED
+#define ASMJIT_ARM_A64FORMATTER_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/formatter.h"
+#include "../core/string.h"
+#include "../arm/armformatter_p.h"
+#include "../arm/a64globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+namespace FormatterInternal {
+
+using namespace arm::FormatterInternal;
+
+Error ASMJIT_CDECL formatRegister(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  RegType regType,
+  uint32_t regId,
+  uint32_t elementType = 0,
+  uint32_t elementIndex = 0xFFFFFFFFu) noexcept;
+
+Error ASMJIT_CDECL formatOperand(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const Operand_& op) noexcept;
+
+Error ASMJIT_CDECL formatInstruction(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept;
+
+} // {FormatterInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
+#endif // ASMJIT_ARM_A64FORMATTER_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64func.cpp b/lib/lepton/asmjit/arm/a64func.cpp
new file mode 100644
index 0000000000..55e3f2e71e
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64func.cpp
@@ -0,0 +1,189 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64)
+
+#include "../arm/a64func_p.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+namespace FuncInternal {
+
+static inline bool shouldThreatAsCDecl(CallConvId ccId) noexcept {
+  return ccId == CallConvId::kCDecl ||
+         ccId == CallConvId::kStdCall ||
+         ccId == CallConvId::kFastCall ||
+         ccId == CallConvId::kVectorCall ||
+         ccId == CallConvId::kThisCall ||
+         ccId == CallConvId::kRegParm1 ||
+         ccId == CallConvId::kRegParm2 ||
+         ccId == CallConvId::kRegParm3;
+}
+
+static RegType regTypeFromFpOrVecTypeId(TypeId typeId) noexcept {
+  if (typeId == TypeId::kFloat32)
+    return RegType::kARM_VecS;
+  else if (typeId == TypeId::kFloat64)
+    return RegType::kARM_VecD;
+  else if (TypeUtils::isVec32(typeId))
+    return RegType::kARM_VecS;
+  else if (TypeUtils::isVec64(typeId))
+    return RegType::kARM_VecD;
+  else if (TypeUtils::isVec128(typeId))
+    return RegType::kARM_VecV;
+  else
+    return RegType::kNone;
+}
+
+ASMJIT_FAVOR_SIZE Error initCallConv(CallConv& cc, CallConvId ccId, const Environment& environment) noexcept {
+  cc.setArch(environment.arch());
+
+  cc.setSaveRestoreRegSize(RegGroup::kGp, 8);
+  cc.setSaveRestoreRegSize(RegGroup::kVec, 8);
+  cc.setSaveRestoreAlignment(RegGroup::kGp, 16);
+  cc.setSaveRestoreAlignment(RegGroup::kVec, 16);
+  cc.setSaveRestoreAlignment(RegGroup::kExtraVirt2, 1);
+  cc.setSaveRestoreAlignment(RegGroup::kExtraVirt3, 1);
+  cc.setPassedOrder(RegGroup::kGp, 0, 1, 2, 3, 4, 5, 6, 7);
+  cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5, 6, 7);
+  cc.setNaturalStackAlignment(16);
+
+  if (shouldThreatAsCDecl(ccId)) {
+    // ARM doesn't have that many calling conventions as we can find in X86 world, treat most conventions as __cdecl.
+    cc.setId(CallConvId::kCDecl);
+    cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(Gp::kIdOs, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30));
+    cc.setPreservedRegs(RegGroup::kVec, Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15));
+  }
+  else {
+    cc.setId(ccId);
+    cc.setSaveRestoreRegSize(RegGroup::kVec, 16);
+    cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30));
+    cc.setPreservedRegs(RegGroup::kVec, Support::bitMask(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31));
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept {
+  DebugUtils::unused(signature);
+
+  const CallConv& cc = func.callConv();
+  uint32_t stackOffset = 0;
+
+  uint32_t i;
+  uint32_t argCount = func.argCount();
+
+  if (func.hasRet()) {
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      TypeId typeId = func._rets[valueIndex].typeId();
+
+      // Terminate at the first void type (end of the pack).
+      if (typeId == TypeId::kVoid)
+        break;
+
+      switch (typeId) {
+        case TypeId::kInt8:
+        case TypeId::kInt16:
+        case TypeId::kInt32: {
+          func._rets[valueIndex].initReg(RegType::kARM_GpW, valueIndex, TypeId::kInt32);
+          break;
+        }
+
+        case TypeId::kUInt8:
+        case TypeId::kUInt16:
+        case TypeId::kUInt32: {
+          func._rets[valueIndex].initReg(RegType::kARM_GpW, valueIndex, TypeId::kUInt32);
+          break;
+        }
+
+        case TypeId::kInt64:
+        case TypeId::kUInt64: {
+          func._rets[valueIndex].initReg(RegType::kARM_GpX, valueIndex, typeId);
+          break;
+        }
+
+        default: {
+          RegType regType = regTypeFromFpOrVecTypeId(typeId);
+          if (regType == RegType::kNone)
+            return DebugUtils::errored(kErrorInvalidRegType);
+
+          func._rets[valueIndex].initReg(regType, valueIndex, typeId);
+          break;
+        }
+      }
+    }
+  }
+
+  switch (cc.strategy()) {
+    case CallConvStrategy::kDefault: {
+      uint32_t gpzPos = 0;
+      uint32_t vecPos = 0;
+
+      for (i = 0; i < argCount; i++) {
+        FuncValue& arg = func._args[i][0];
+        TypeId typeId = arg.typeId();
+
+        if (TypeUtils::isInt(typeId)) {
+          uint32_t regId = BaseReg::kIdBad;
+
+          if (gpzPos < CallConv::kMaxRegArgsPerGroup)
+            regId = cc._passedOrder[RegGroup::kGp].id[gpzPos];
+
+          if (regId != BaseReg::kIdBad) {
+            RegType regType = typeId <= TypeId::kUInt32 ? RegType::kARM_GpW : RegType::kARM_GpX;
+            arg.assignRegData(regType, regId);
+            func.addUsedRegs(RegGroup::kGp, Support::bitMask(regId));
+            gpzPos++;
+          }
+          else {
+            uint32_t size = Support::max<uint32_t>(TypeUtils::sizeOf(typeId), registerSize);
+            arg.assignStackOffset(int32_t(stackOffset));
+            stackOffset += size;
+          }
+          continue;
+        }
+
+        if (TypeUtils::isFloat(typeId) || TypeUtils::isVec(typeId)) {
+          uint32_t regId = BaseReg::kIdBad;
+
+          if (vecPos < CallConv::kMaxRegArgsPerGroup)
+            regId = cc._passedOrder[RegGroup::kVec].id[vecPos];
+
+          if (regId != BaseReg::kIdBad) {
+            RegType regType = regTypeFromFpOrVecTypeId(typeId);
+            if (regType == RegType::kNone)
+              return DebugUtils::errored(kErrorInvalidRegType);
+
+            arg.initTypeId(typeId);
+            arg.assignRegData(regType, regId);
+            func.addUsedRegs(RegGroup::kVec, Support::bitMask(regId));
+            vecPos++;
+          }
+          else {
+            uint32_t size = TypeUtils::sizeOf(typeId);
+            arg.assignStackOffset(int32_t(stackOffset));
+            stackOffset += size;
+          }
+          continue;
+        }
+      }
+      break;
+    }
+
+    default:
+      return DebugUtils::errored(kErrorInvalidState);
+  }
+
+  func._argStackSize = stackOffset;
+  return kErrorOk;
+}
+
+} // {FuncInternal}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64
diff --git a/lib/lepton/asmjit/arm/a64func_p.h b/lib/lepton/asmjit/arm/a64func_p.h
new file mode 100644
index 0000000000..9f531fc5a2
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64func_p.h
@@ -0,0 +1,33 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64FUNC_P_H_INCLUDED
+#define ASMJIT_ARM_A64FUNC_P_H_INCLUDED
+
+#include "../core/func.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+//! AArch64-specific function API (calling conventions and other utilities).
+namespace FuncInternal {
+
+//! Initialize `CallConv` structure (AArch64 specific).
+Error initCallConv(CallConv& cc, CallConvId ccId, const Environment& environment) noexcept;
+
+//! Initialize `FuncDetail` (AArch64 specific).
+Error initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept;
+
+} // {FuncInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64FUNC_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64globals.h b/lib/lepton/asmjit/arm/a64globals.h
new file mode 100644
index 0000000000..2b6b6f0ce9
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64globals.h
@@ -0,0 +1,1894 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64GLOBALS_H_INCLUDED
+#define ASMJIT_ARM_A64GLOBALS_H_INCLUDED
+
+#include "../arm/armglobals.h"
+
+//! \namespace asmjit::a64
+//! \ingroup asmjit_a64
+//!
+//! AArch64 backend.
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64 uses everything from arm namespace and adds into it.
+using namespace arm;
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! AArch64 instruction.
+//!
+//! \note Only used to hold ARM-specific enumerations and static functions.
+struct Inst {
+  //! Instruction id.
+  enum Id : uint32_t {
+    // ${InstId:Begin}
+    kIdNone = 0,                         //!< Instruction ''.
+    kIdAdc,                              //!< Instruction 'adc'.
+    kIdAdcs,                             //!< Instruction 'adcs'.
+    kIdAdd,                              //!< Instruction 'add'.
+    kIdAddg,                             //!< Instruction 'addg'.
+    kIdAdds,                             //!< Instruction 'adds'.
+    kIdAdr,                              //!< Instruction 'adr'.
+    kIdAdrp,                             //!< Instruction 'adrp'.
+    kIdAnd,                              //!< Instruction 'and'.
+    kIdAnds,                             //!< Instruction 'ands'.
+    kIdAsr,                              //!< Instruction 'asr'.
+    kIdAsrv,                             //!< Instruction 'asrv'.
+    kIdAt,                               //!< Instruction 'at'.
+    kIdAutda,                            //!< Instruction 'autda'.
+    kIdAutdza,                           //!< Instruction 'autdza'.
+    kIdAutdb,                            //!< Instruction 'autdb'.
+    kIdAutdzb,                           //!< Instruction 'autdzb'.
+    kIdAutia,                            //!< Instruction 'autia'.
+    kIdAutia1716,                        //!< Instruction 'autia1716'.
+    kIdAutiasp,                          //!< Instruction 'autiasp'.
+    kIdAutiaz,                           //!< Instruction 'autiaz'.
+    kIdAutib,                            //!< Instruction 'autib'.
+    kIdAutib1716,                        //!< Instruction 'autib1716'.
+    kIdAutibsp,                          //!< Instruction 'autibsp'.
+    kIdAutibz,                           //!< Instruction 'autibz'.
+    kIdAutiza,                           //!< Instruction 'autiza'.
+    kIdAutizb,                           //!< Instruction 'autizb'.
+    kIdAxflag,                           //!< Instruction 'axflag'.
+    kIdB,                                //!< Instruction 'b'.
+    kIdBfc,                              //!< Instruction 'bfc'.
+    kIdBfi,                              //!< Instruction 'bfi'.
+    kIdBfm,                              //!< Instruction 'bfm'.
+    kIdBfxil,                            //!< Instruction 'bfxil'.
+    kIdBic,                              //!< Instruction 'bic'.
+    kIdBics,                             //!< Instruction 'bics'.
+    kIdBl,                               //!< Instruction 'bl'.
+    kIdBlr,                              //!< Instruction 'blr'.
+    kIdBr,                               //!< Instruction 'br'.
+    kIdBrk,                              //!< Instruction 'brk'.
+    kIdCas,                              //!< Instruction 'cas'.
+    kIdCasa,                             //!< Instruction 'casa'.
+    kIdCasab,                            //!< Instruction 'casab'.
+    kIdCasah,                            //!< Instruction 'casah'.
+    kIdCasal,                            //!< Instruction 'casal'.
+    kIdCasalb,                           //!< Instruction 'casalb'.
+    kIdCasalh,                           //!< Instruction 'casalh'.
+    kIdCasb,                             //!< Instruction 'casb'.
+    kIdCash,                             //!< Instruction 'cash'.
+    kIdCasl,                             //!< Instruction 'casl'.
+    kIdCaslb,                            //!< Instruction 'caslb'.
+    kIdCaslh,                            //!< Instruction 'caslh'.
+    kIdCasp,                             //!< Instruction 'casp'.
+    kIdCaspa,                            //!< Instruction 'caspa'.
+    kIdCaspal,                           //!< Instruction 'caspal'.
+    kIdCaspl,                            //!< Instruction 'caspl'.
+    kIdCbnz,                             //!< Instruction 'cbnz'.
+    kIdCbz,                              //!< Instruction 'cbz'.
+    kIdCcmn,                             //!< Instruction 'ccmn'.
+    kIdCcmp,                             //!< Instruction 'ccmp'.
+    kIdCfinv,                            //!< Instruction 'cfinv'.
+    kIdCinc,                             //!< Instruction 'cinc'.
+    kIdCinv,                             //!< Instruction 'cinv'.
+    kIdClrex,                            //!< Instruction 'clrex'.
+    kIdCls,                              //!< Instruction 'cls'.
+    kIdClz,                              //!< Instruction 'clz'.
+    kIdCmn,                              //!< Instruction 'cmn'.
+    kIdCmp,                              //!< Instruction 'cmp'.
+    kIdCmpp,                             //!< Instruction 'cmpp'.
+    kIdCneg,                             //!< Instruction 'cneg'.
+    kIdCrc32b,                           //!< Instruction 'crc32b'.
+    kIdCrc32cb,                          //!< Instruction 'crc32cb'.
+    kIdCrc32ch,                          //!< Instruction 'crc32ch'.
+    kIdCrc32cw,                          //!< Instruction 'crc32cw'.
+    kIdCrc32cx,                          //!< Instruction 'crc32cx'.
+    kIdCrc32h,                           //!< Instruction 'crc32h'.
+    kIdCrc32w,                           //!< Instruction 'crc32w'.
+    kIdCrc32x,                           //!< Instruction 'crc32x'.
+    kIdCsdb,                             //!< Instruction 'csdb'.
+    kIdCsel,                             //!< Instruction 'csel'.
+    kIdCset,                             //!< Instruction 'cset'.
+    kIdCsetm,                            //!< Instruction 'csetm'.
+    kIdCsinc,                            //!< Instruction 'csinc'.
+    kIdCsinv,                            //!< Instruction 'csinv'.
+    kIdCsneg,                            //!< Instruction 'csneg'.
+    kIdDc,                               //!< Instruction 'dc'.
+    kIdDcps1,                            //!< Instruction 'dcps1'.
+    kIdDcps2,                            //!< Instruction 'dcps2'.
+    kIdDcps3,                            //!< Instruction 'dcps3'.
+    kIdDgh,                              //!< Instruction 'dgh'.
+    kIdDmb,                              //!< Instruction 'dmb'.
+    kIdDrps,                             //!< Instruction 'drps'.
+    kIdDsb,                              //!< Instruction 'dsb'.
+    kIdEon,                              //!< Instruction 'eon'.
+    kIdEor,                              //!< Instruction 'eor'.
+    kIdEsb,                              //!< Instruction 'esb'.
+    kIdExtr,                             //!< Instruction 'extr'.
+    kIdEret,                             //!< Instruction 'eret'.
+    kIdGmi,                              //!< Instruction 'gmi'.
+    kIdHint,                             //!< Instruction 'hint'.
+    kIdHlt,                              //!< Instruction 'hlt'.
+    kIdHvc,                              //!< Instruction 'hvc'.
+    kIdIc,                               //!< Instruction 'ic'.
+    kIdIsb,                              //!< Instruction 'isb'.
+    kIdLdadd,                            //!< Instruction 'ldadd'.
+    kIdLdadda,                           //!< Instruction 'ldadda'.
+    kIdLdaddab,                          //!< Instruction 'ldaddab'.
+    kIdLdaddah,                          //!< Instruction 'ldaddah'.
+    kIdLdaddal,                          //!< Instruction 'ldaddal'.
+    kIdLdaddalb,                         //!< Instruction 'ldaddalb'.
+    kIdLdaddalh,                         //!< Instruction 'ldaddalh'.
+    kIdLdaddb,                           //!< Instruction 'ldaddb'.
+    kIdLdaddh,                           //!< Instruction 'ldaddh'.
+    kIdLdaddl,                           //!< Instruction 'ldaddl'.
+    kIdLdaddlb,                          //!< Instruction 'ldaddlb'.
+    kIdLdaddlh,                          //!< Instruction 'ldaddlh'.
+    kIdLdar,                             //!< Instruction 'ldar'.
+    kIdLdarb,                            //!< Instruction 'ldarb'.
+    kIdLdarh,                            //!< Instruction 'ldarh'.
+    kIdLdaxp,                            //!< Instruction 'ldaxp'.
+    kIdLdaxr,                            //!< Instruction 'ldaxr'.
+    kIdLdaxrb,                           //!< Instruction 'ldaxrb'.
+    kIdLdaxrh,                           //!< Instruction 'ldaxrh'.
+    kIdLdclr,                            //!< Instruction 'ldclr'.
+    kIdLdclra,                           //!< Instruction 'ldclra'.
+    kIdLdclrab,                          //!< Instruction 'ldclrab'.
+    kIdLdclrah,                          //!< Instruction 'ldclrah'.
+    kIdLdclral,                          //!< Instruction 'ldclral'.
+    kIdLdclralb,                         //!< Instruction 'ldclralb'.
+    kIdLdclralh,                         //!< Instruction 'ldclralh'.
+    kIdLdclrb,                           //!< Instruction 'ldclrb'.
+    kIdLdclrh,                           //!< Instruction 'ldclrh'.
+    kIdLdclrl,                           //!< Instruction 'ldclrl'.
+    kIdLdclrlb,                          //!< Instruction 'ldclrlb'.
+    kIdLdclrlh,                          //!< Instruction 'ldclrlh'.
+    kIdLdeor,                            //!< Instruction 'ldeor'.
+    kIdLdeora,                           //!< Instruction 'ldeora'.
+    kIdLdeorab,                          //!< Instruction 'ldeorab'.
+    kIdLdeorah,                          //!< Instruction 'ldeorah'.
+    kIdLdeoral,                          //!< Instruction 'ldeoral'.
+    kIdLdeoralb,                         //!< Instruction 'ldeoralb'.
+    kIdLdeoralh,                         //!< Instruction 'ldeoralh'.
+    kIdLdeorb,                           //!< Instruction 'ldeorb'.
+    kIdLdeorh,                           //!< Instruction 'ldeorh'.
+    kIdLdeorl,                           //!< Instruction 'ldeorl'.
+    kIdLdeorlb,                          //!< Instruction 'ldeorlb'.
+    kIdLdeorlh,                          //!< Instruction 'ldeorlh'.
+    kIdLdg,                              //!< Instruction 'ldg'.
+    kIdLdgm,                             //!< Instruction 'ldgm'.
+    kIdLdlar,                            //!< Instruction 'ldlar'.
+    kIdLdlarb,                           //!< Instruction 'ldlarb'.
+    kIdLdlarh,                           //!< Instruction 'ldlarh'.
+    kIdLdnp,                             //!< Instruction 'ldnp'.
+    kIdLdp,                              //!< Instruction 'ldp'.
+    kIdLdpsw,                            //!< Instruction 'ldpsw'.
+    kIdLdr,                              //!< Instruction 'ldr'.
+    kIdLdraa,                            //!< Instruction 'ldraa'.
+    kIdLdrab,                            //!< Instruction 'ldrab'.
+    kIdLdrb,                             //!< Instruction 'ldrb'.
+    kIdLdrh,                             //!< Instruction 'ldrh'.
+    kIdLdrsb,                            //!< Instruction 'ldrsb'.
+    kIdLdrsh,                            //!< Instruction 'ldrsh'.
+    kIdLdrsw,                            //!< Instruction 'ldrsw'.
+    kIdLdset,                            //!< Instruction 'ldset'.
+    kIdLdseta,                           //!< Instruction 'ldseta'.
+    kIdLdsetab,                          //!< Instruction 'ldsetab'.
+    kIdLdsetah,                          //!< Instruction 'ldsetah'.
+    kIdLdsetal,                          //!< Instruction 'ldsetal'.
+    kIdLdsetalb,                         //!< Instruction 'ldsetalb'.
+    kIdLdsetalh,                         //!< Instruction 'ldsetalh'.
+    kIdLdsetb,                           //!< Instruction 'ldsetb'.
+    kIdLdseth,                           //!< Instruction 'ldseth'.
+    kIdLdsetl,                           //!< Instruction 'ldsetl'.
+    kIdLdsetlb,                          //!< Instruction 'ldsetlb'.
+    kIdLdsetlh,                          //!< Instruction 'ldsetlh'.
+    kIdLdsmax,                           //!< Instruction 'ldsmax'.
+    kIdLdsmaxa,                          //!< Instruction 'ldsmaxa'.
+    kIdLdsmaxab,                         //!< Instruction 'ldsmaxab'.
+    kIdLdsmaxah,                         //!< Instruction 'ldsmaxah'.
+    kIdLdsmaxal,                         //!< Instruction 'ldsmaxal'.
+    kIdLdsmaxalb,                        //!< Instruction 'ldsmaxalb'.
+    kIdLdsmaxalh,                        //!< Instruction 'ldsmaxalh'.
+    kIdLdsmaxb,                          //!< Instruction 'ldsmaxb'.
+    kIdLdsmaxh,                          //!< Instruction 'ldsmaxh'.
+    kIdLdsmaxl,                          //!< Instruction 'ldsmaxl'.
+    kIdLdsmaxlb,                         //!< Instruction 'ldsmaxlb'.
+    kIdLdsmaxlh,                         //!< Instruction 'ldsmaxlh'.
+    kIdLdsmin,                           //!< Instruction 'ldsmin'.
+    kIdLdsmina,                          //!< Instruction 'ldsmina'.
+    kIdLdsminab,                         //!< Instruction 'ldsminab'.
+    kIdLdsminah,                         //!< Instruction 'ldsminah'.
+    kIdLdsminal,                         //!< Instruction 'ldsminal'.
+    kIdLdsminalb,                        //!< Instruction 'ldsminalb'.
+    kIdLdsminalh,                        //!< Instruction 'ldsminalh'.
+    kIdLdsminb,                          //!< Instruction 'ldsminb'.
+    kIdLdsminh,                          //!< Instruction 'ldsminh'.
+    kIdLdsminl,                          //!< Instruction 'ldsminl'.
+    kIdLdsminlb,                         //!< Instruction 'ldsminlb'.
+    kIdLdsminlh,                         //!< Instruction 'ldsminlh'.
+    kIdLdtr,                             //!< Instruction 'ldtr'.
+    kIdLdtrb,                            //!< Instruction 'ldtrb'.
+    kIdLdtrh,                            //!< Instruction 'ldtrh'.
+    kIdLdtrsb,                           //!< Instruction 'ldtrsb'.
+    kIdLdtrsh,                           //!< Instruction 'ldtrsh'.
+    kIdLdtrsw,                           //!< Instruction 'ldtrsw'.
+    kIdLdumax,                           //!< Instruction 'ldumax'.
+    kIdLdumaxa,                          //!< Instruction 'ldumaxa'.
+    kIdLdumaxab,                         //!< Instruction 'ldumaxab'.
+    kIdLdumaxah,                         //!< Instruction 'ldumaxah'.
+    kIdLdumaxal,                         //!< Instruction 'ldumaxal'.
+    kIdLdumaxalb,                        //!< Instruction 'ldumaxalb'.
+    kIdLdumaxalh,                        //!< Instruction 'ldumaxalh'.
+    kIdLdumaxb,                          //!< Instruction 'ldumaxb'.
+    kIdLdumaxh,                          //!< Instruction 'ldumaxh'.
+    kIdLdumaxl,                          //!< Instruction 'ldumaxl'.
+    kIdLdumaxlb,                         //!< Instruction 'ldumaxlb'.
+    kIdLdumaxlh,                         //!< Instruction 'ldumaxlh'.
+    kIdLdumin,                           //!< Instruction 'ldumin'.
+    kIdLdumina,                          //!< Instruction 'ldumina'.
+    kIdLduminab,                         //!< Instruction 'lduminab'.
+    kIdLduminah,                         //!< Instruction 'lduminah'.
+    kIdLduminal,                         //!< Instruction 'lduminal'.
+    kIdLduminalb,                        //!< Instruction 'lduminalb'.
+    kIdLduminalh,                        //!< Instruction 'lduminalh'.
+    kIdLduminb,                          //!< Instruction 'lduminb'.
+    kIdLduminh,                          //!< Instruction 'lduminh'.
+    kIdLduminl,                          //!< Instruction 'lduminl'.
+    kIdLduminlb,                         //!< Instruction 'lduminlb'.
+    kIdLduminlh,                         //!< Instruction 'lduminlh'.
+    kIdLdur,                             //!< Instruction 'ldur'.
+    kIdLdurb,                            //!< Instruction 'ldurb'.
+    kIdLdurh,                            //!< Instruction 'ldurh'.
+    kIdLdursb,                           //!< Instruction 'ldursb'.
+    kIdLdursh,                           //!< Instruction 'ldursh'.
+    kIdLdursw,                           //!< Instruction 'ldursw'.
+    kIdLdxp,                             //!< Instruction 'ldxp'.
+    kIdLdxr,                             //!< Instruction 'ldxr'.
+    kIdLdxrb,                            //!< Instruction 'ldxrb'.
+    kIdLdxrh,                            //!< Instruction 'ldxrh'.
+    kIdLsl,                              //!< Instruction 'lsl'.
+    kIdLslv,                             //!< Instruction 'lslv'.
+    kIdLsr,                              //!< Instruction 'lsr'.
+    kIdLsrv,                             //!< Instruction 'lsrv'.
+    kIdMadd,                             //!< Instruction 'madd'.
+    kIdMneg,                             //!< Instruction 'mneg'.
+    kIdMov,                              //!< Instruction 'mov'.
+    kIdMovk,                             //!< Instruction 'movk'.
+    kIdMovn,                             //!< Instruction 'movn'.
+    kIdMovz,                             //!< Instruction 'movz'.
+    kIdMrs,                              //!< Instruction 'mrs'.
+    kIdMsr,                              //!< Instruction 'msr'.
+    kIdMsub,                             //!< Instruction 'msub'.
+    kIdMul,                              //!< Instruction 'mul'.
+    kIdMvn,                              //!< Instruction 'mvn'.
+    kIdNeg,                              //!< Instruction 'neg'.
+    kIdNegs,                             //!< Instruction 'negs'.
+    kIdNgc,                              //!< Instruction 'ngc'.
+    kIdNgcs,                             //!< Instruction 'ngcs'.
+    kIdNop,                              //!< Instruction 'nop'.
+    kIdOrn,                              //!< Instruction 'orn'.
+    kIdOrr,                              //!< Instruction 'orr'.
+    kIdPacda,                            //!< Instruction 'pacda'.
+    kIdPacdb,                            //!< Instruction 'pacdb'.
+    kIdPacdza,                           //!< Instruction 'pacdza'.
+    kIdPacdzb,                           //!< Instruction 'pacdzb'.
+    kIdPacga,                            //!< Instruction 'pacga'.
+    kIdPssbb,                            //!< Instruction 'pssbb'.
+    kIdRbit,                             //!< Instruction 'rbit'.
+    kIdRet,                              //!< Instruction 'ret'.
+    kIdRev,                              //!< Instruction 'rev'.
+    kIdRev16,                            //!< Instruction 'rev16'.
+    kIdRev32,                            //!< Instruction 'rev32'.
+    kIdRev64,                            //!< Instruction 'rev64'.
+    kIdRor,                              //!< Instruction 'ror'.
+    kIdRorv,                             //!< Instruction 'rorv'.
+    kIdSbc,                              //!< Instruction 'sbc'.
+    kIdSbcs,                             //!< Instruction 'sbcs'.
+    kIdSbfiz,                            //!< Instruction 'sbfiz'.
+    kIdSbfm,                             //!< Instruction 'sbfm'.
+    kIdSbfx,                             //!< Instruction 'sbfx'.
+    kIdSdiv,                             //!< Instruction 'sdiv'.
+    kIdSetf8,                            //!< Instruction 'setf8'.
+    kIdSetf16,                           //!< Instruction 'setf16'.
+    kIdSev,                              //!< Instruction 'sev'.
+    kIdSevl,                             //!< Instruction 'sevl'.
+    kIdSmaddl,                           //!< Instruction 'smaddl'.
+    kIdSmc,                              //!< Instruction 'smc'.
+    kIdSmnegl,                           //!< Instruction 'smnegl'.
+    kIdSmsubl,                           //!< Instruction 'smsubl'.
+    kIdSmulh,                            //!< Instruction 'smulh'.
+    kIdSmull,                            //!< Instruction 'smull'.
+    kIdSsbb,                             //!< Instruction 'ssbb'.
+    kIdSt2g,                             //!< Instruction 'st2g'.
+    kIdStadd,                            //!< Instruction 'stadd'.
+    kIdStaddl,                           //!< Instruction 'staddl'.
+    kIdStaddb,                           //!< Instruction 'staddb'.
+    kIdStaddlb,                          //!< Instruction 'staddlb'.
+    kIdStaddh,                           //!< Instruction 'staddh'.
+    kIdStaddlh,                          //!< Instruction 'staddlh'.
+    kIdStclr,                            //!< Instruction 'stclr'.
+    kIdStclrl,                           //!< Instruction 'stclrl'.
+    kIdStclrb,                           //!< Instruction 'stclrb'.
+    kIdStclrlb,                          //!< Instruction 'stclrlb'.
+    kIdStclrh,                           //!< Instruction 'stclrh'.
+    kIdStclrlh,                          //!< Instruction 'stclrlh'.
+    kIdSteor,                            //!< Instruction 'steor'.
+    kIdSteorl,                           //!< Instruction 'steorl'.
+    kIdSteorb,                           //!< Instruction 'steorb'.
+    kIdSteorlb,                          //!< Instruction 'steorlb'.
+    kIdSteorh,                           //!< Instruction 'steorh'.
+    kIdSteorlh,                          //!< Instruction 'steorlh'.
+    kIdStg,                              //!< Instruction 'stg'.
+    kIdStgm,                             //!< Instruction 'stgm'.
+    kIdStgp,                             //!< Instruction 'stgp'.
+    kIdStllr,                            //!< Instruction 'stllr'.
+    kIdStllrb,                           //!< Instruction 'stllrb'.
+    kIdStllrh,                           //!< Instruction 'stllrh'.
+    kIdStlr,                             //!< Instruction 'stlr'.
+    kIdStlrb,                            //!< Instruction 'stlrb'.
+    kIdStlrh,                            //!< Instruction 'stlrh'.
+    kIdStlxp,                            //!< Instruction 'stlxp'.
+    kIdStlxr,                            //!< Instruction 'stlxr'.
+    kIdStlxrb,                           //!< Instruction 'stlxrb'.
+    kIdStlxrh,                           //!< Instruction 'stlxrh'.
+    kIdStnp,                             //!< Instruction 'stnp'.
+    kIdStp,                              //!< Instruction 'stp'.
+    kIdStr,                              //!< Instruction 'str'.
+    kIdStrb,                             //!< Instruction 'strb'.
+    kIdStrh,                             //!< Instruction 'strh'.
+    kIdStset,                            //!< Instruction 'stset'.
+    kIdStsetl,                           //!< Instruction 'stsetl'.
+    kIdStsetb,                           //!< Instruction 'stsetb'.
+    kIdStsetlb,                          //!< Instruction 'stsetlb'.
+    kIdStseth,                           //!< Instruction 'stseth'.
+    kIdStsetlh,                          //!< Instruction 'stsetlh'.
+    kIdStsmax,                           //!< Instruction 'stsmax'.
+    kIdStsmaxl,                          //!< Instruction 'stsmaxl'.
+    kIdStsmaxb,                          //!< Instruction 'stsmaxb'.
+    kIdStsmaxlb,                         //!< Instruction 'stsmaxlb'.
+    kIdStsmaxh,                          //!< Instruction 'stsmaxh'.
+    kIdStsmaxlh,                         //!< Instruction 'stsmaxlh'.
+    kIdStsmin,                           //!< Instruction 'stsmin'.
+    kIdStsminl,                          //!< Instruction 'stsminl'.
+    kIdStsminb,                          //!< Instruction 'stsminb'.
+    kIdStsminlb,                         //!< Instruction 'stsminlb'.
+    kIdStsminh,                          //!< Instruction 'stsminh'.
+    kIdStsminlh,                         //!< Instruction 'stsminlh'.
+    kIdSttr,                             //!< Instruction 'sttr'.
+    kIdSttrb,                            //!< Instruction 'sttrb'.
+    kIdSttrh,                            //!< Instruction 'sttrh'.
+    kIdStumax,                           //!< Instruction 'stumax'.
+    kIdStumaxl,                          //!< Instruction 'stumaxl'.
+    kIdStumaxb,                          //!< Instruction 'stumaxb'.
+    kIdStumaxlb,                         //!< Instruction 'stumaxlb'.
+    kIdStumaxh,                          //!< Instruction 'stumaxh'.
+    kIdStumaxlh,                         //!< Instruction 'stumaxlh'.
+    kIdStumin,                           //!< Instruction 'stumin'.
+    kIdStuminl,                          //!< Instruction 'stuminl'.
+    kIdStuminb,                          //!< Instruction 'stuminb'.
+    kIdStuminlb,                         //!< Instruction 'stuminlb'.
+    kIdStuminh,                          //!< Instruction 'stuminh'.
+    kIdStuminlh,                         //!< Instruction 'stuminlh'.
+    kIdStur,                             //!< Instruction 'stur'.
+    kIdSturb,                            //!< Instruction 'sturb'.
+    kIdSturh,                            //!< Instruction 'sturh'.
+    kIdStxp,                             //!< Instruction 'stxp'.
+    kIdStxr,                             //!< Instruction 'stxr'.
+    kIdStxrb,                            //!< Instruction 'stxrb'.
+    kIdStxrh,                            //!< Instruction 'stxrh'.
+    kIdStz2g,                            //!< Instruction 'stz2g'.
+    kIdStzg,                             //!< Instruction 'stzg'.
+    kIdStzgm,                            //!< Instruction 'stzgm'.
+    kIdSub,                              //!< Instruction 'sub'.
+    kIdSubg,                             //!< Instruction 'subg'.
+    kIdSubp,                             //!< Instruction 'subp'.
+    kIdSubps,                            //!< Instruction 'subps'.
+    kIdSubs,                             //!< Instruction 'subs'.
+    kIdSvc,                              //!< Instruction 'svc'.
+    kIdSwp,                              //!< Instruction 'swp'.
+    kIdSwpa,                             //!< Instruction 'swpa'.
+    kIdSwpab,                            //!< Instruction 'swpab'.
+    kIdSwpah,                            //!< Instruction 'swpah'.
+    kIdSwpal,                            //!< Instruction 'swpal'.
+    kIdSwpalb,                           //!< Instruction 'swpalb'.
+    kIdSwpalh,                           //!< Instruction 'swpalh'.
+    kIdSwpb,                             //!< Instruction 'swpb'.
+    kIdSwph,                             //!< Instruction 'swph'.
+    kIdSwpl,                             //!< Instruction 'swpl'.
+    kIdSwplb,                            //!< Instruction 'swplb'.
+    kIdSwplh,                            //!< Instruction 'swplh'.
+    kIdSxtb,                             //!< Instruction 'sxtb'.
+    kIdSxth,                             //!< Instruction 'sxth'.
+    kIdSxtw,                             //!< Instruction 'sxtw'.
+    kIdSys,                              //!< Instruction 'sys'.
+    kIdTlbi,                             //!< Instruction 'tlbi'.
+    kIdTst,                              //!< Instruction 'tst'.
+    kIdTbnz,                             //!< Instruction 'tbnz'.
+    kIdTbz,                              //!< Instruction 'tbz'.
+    kIdUbfiz,                            //!< Instruction 'ubfiz'.
+    kIdUbfm,                             //!< Instruction 'ubfm'.
+    kIdUbfx,                             //!< Instruction 'ubfx'.
+    kIdUdf,                              //!< Instruction 'udf'.
+    kIdUdiv,                             //!< Instruction 'udiv'.
+    kIdUmaddl,                           //!< Instruction 'umaddl'.
+    kIdUmnegl,                           //!< Instruction 'umnegl'.
+    kIdUmull,                            //!< Instruction 'umull'.
+    kIdUmulh,                            //!< Instruction 'umulh'.
+    kIdUmsubl,                           //!< Instruction 'umsubl'.
+    kIdUxtb,                             //!< Instruction 'uxtb'.
+    kIdUxth,                             //!< Instruction 'uxth'.
+    kIdWfe,                              //!< Instruction 'wfe'.
+    kIdWfi,                              //!< Instruction 'wfi'.
+    kIdXaflag,                           //!< Instruction 'xaflag'.
+    kIdXpacd,                            //!< Instruction 'xpacd'.
+    kIdXpaci,                            //!< Instruction 'xpaci'.
+    kIdXpaclri,                          //!< Instruction 'xpaclri'.
+    kIdYield,                            //!< Instruction 'yield'.
+    kIdAbs_v,                            //!< Instruction 'abs' {ASIMD}.
+    kIdAdd_v,                            //!< Instruction 'add' {ASIMD}.
+    kIdAddhn_v,                          //!< Instruction 'addhn' {ASIMD}.
+    kIdAddhn2_v,                         //!< Instruction 'addhn2' {ASIMD}.
+    kIdAddp_v,                           //!< Instruction 'addp' {ASIMD}.
+    kIdAddv_v,                           //!< Instruction 'addv' {ASIMD}.
+    kIdAesd_v,                           //!< Instruction 'aesd' {ASIMD}.
+    kIdAese_v,                           //!< Instruction 'aese' {ASIMD}.
+    kIdAesimc_v,                         //!< Instruction 'aesimc' {ASIMD}.
+    kIdAesmc_v,                          //!< Instruction 'aesmc' {ASIMD}.
+    kIdAnd_v,                            //!< Instruction 'and' {ASIMD}.
+    kIdBcax_v,                           //!< Instruction 'bcax' {ASIMD}.
+    kIdBfcvt_v,                          //!< Instruction 'bfcvt' {ASIMD}.
+    kIdBfcvtn_v,                         //!< Instruction 'bfcvtn' {ASIMD}.
+    kIdBfcvtn2_v,                        //!< Instruction 'bfcvtn2' {ASIMD}.
+    kIdBfdot_v,                          //!< Instruction 'bfdot' {ASIMD}.
+    kIdBfmlalb_v,                        //!< Instruction 'bfmlalb' {ASIMD}.
+    kIdBfmlalt_v,                        //!< Instruction 'bfmlalt' {ASIMD}.
+    kIdBfmmla_v,                         //!< Instruction 'bfmmla' {ASIMD}.
+    kIdBic_v,                            //!< Instruction 'bic' {ASIMD}.
+    kIdBif_v,                            //!< Instruction 'bif' {ASIMD}.
+    kIdBit_v,                            //!< Instruction 'bit' {ASIMD}.
+    kIdBsl_v,                            //!< Instruction 'bsl' {ASIMD}.
+    kIdCls_v,                            //!< Instruction 'cls' {ASIMD}.
+    kIdClz_v,                            //!< Instruction 'clz' {ASIMD}.
+    kIdCmeq_v,                           //!< Instruction 'cmeq' {ASIMD}.
+    kIdCmge_v,                           //!< Instruction 'cmge' {ASIMD}.
+    kIdCmgt_v,                           //!< Instruction 'cmgt' {ASIMD}.
+    kIdCmhi_v,                           //!< Instruction 'cmhi' {ASIMD}.
+    kIdCmhs_v,                           //!< Instruction 'cmhs' {ASIMD}.
+    kIdCmle_v,                           //!< Instruction 'cmle' {ASIMD}.
+    kIdCmlt_v,                           //!< Instruction 'cmlt' {ASIMD}.
+    kIdCmtst_v,                          //!< Instruction 'cmtst' {ASIMD}.
+    kIdCnt_v,                            //!< Instruction 'cnt' {ASIMD}.
+    kIdDup_v,                            //!< Instruction 'dup' {ASIMD}.
+    kIdEor_v,                            //!< Instruction 'eor' {ASIMD}.
+    kIdEor3_v,                           //!< Instruction 'eor3' {ASIMD}.
+    kIdExt_v,                            //!< Instruction 'ext' {ASIMD}.
+    kIdFabd_v,                           //!< Instruction 'fabd' {ASIMD}.
+    kIdFabs_v,                           //!< Instruction 'fabs' {ASIMD}.
+    kIdFacge_v,                          //!< Instruction 'facge' {ASIMD}.
+    kIdFacgt_v,                          //!< Instruction 'facgt' {ASIMD}.
+    kIdFadd_v,                           //!< Instruction 'fadd' {ASIMD}.
+    kIdFaddp_v,                          //!< Instruction 'faddp' {ASIMD}.
+    kIdFcadd_v,                          //!< Instruction 'fcadd' {ASIMD}.
+    kIdFccmp_v,                          //!< Instruction 'fccmp' {ASIMD}.
+    kIdFccmpe_v,                         //!< Instruction 'fccmpe' {ASIMD}.
+    kIdFcmeq_v,                          //!< Instruction 'fcmeq' {ASIMD}.
+    kIdFcmge_v,                          //!< Instruction 'fcmge' {ASIMD}.
+    kIdFcmgt_v,                          //!< Instruction 'fcmgt' {ASIMD}.
+    kIdFcmla_v,                          //!< Instruction 'fcmla' {ASIMD}.
+    kIdFcmle_v,                          //!< Instruction 'fcmle' {ASIMD}.
+    kIdFcmlt_v,                          //!< Instruction 'fcmlt' {ASIMD}.
+    kIdFcmp_v,                           //!< Instruction 'fcmp' {ASIMD}.
+    kIdFcmpe_v,                          //!< Instruction 'fcmpe' {ASIMD}.
+    kIdFcsel_v,                          //!< Instruction 'fcsel' {ASIMD}.
+    kIdFcvt_v,                           //!< Instruction 'fcvt' {ASIMD}.
+    kIdFcvtas_v,                         //!< Instruction 'fcvtas' {ASIMD}.
+    kIdFcvtau_v,                         //!< Instruction 'fcvtau' {ASIMD}.
+    kIdFcvtl_v,                          //!< Instruction 'fcvtl' {ASIMD}.
+    kIdFcvtl2_v,                         //!< Instruction 'fcvtl2' {ASIMD}.
+    kIdFcvtms_v,                         //!< Instruction 'fcvtms' {ASIMD}.
+    kIdFcvtmu_v,                         //!< Instruction 'fcvtmu' {ASIMD}.
+    kIdFcvtn_v,                          //!< Instruction 'fcvtn' {ASIMD}.
+    kIdFcvtn2_v,                         //!< Instruction 'fcvtn2' {ASIMD}.
+    kIdFcvtns_v,                         //!< Instruction 'fcvtns' {ASIMD}.
+    kIdFcvtnu_v,                         //!< Instruction 'fcvtnu' {ASIMD}.
+    kIdFcvtps_v,                         //!< Instruction 'fcvtps' {ASIMD}.
+    kIdFcvtpu_v,                         //!< Instruction 'fcvtpu' {ASIMD}.
+    kIdFcvtxn_v,                         //!< Instruction 'fcvtxn' {ASIMD}.
+    kIdFcvtxn2_v,                        //!< Instruction 'fcvtxn2' {ASIMD}.
+    kIdFcvtzs_v,                         //!< Instruction 'fcvtzs' {ASIMD}.
+    kIdFcvtzu_v,                         //!< Instruction 'fcvtzu' {ASIMD}.
+    kIdFdiv_v,                           //!< Instruction 'fdiv' {ASIMD}.
+    kIdFjcvtzs_v,                        //!< Instruction 'fjcvtzs' {ASIMD}.
+    kIdFmadd_v,                          //!< Instruction 'fmadd' {ASIMD}.
+    kIdFmax_v,                           //!< Instruction 'fmax' {ASIMD}.
+    kIdFmaxnm_v,                         //!< Instruction 'fmaxnm' {ASIMD}.
+    kIdFmaxnmp_v,                        //!< Instruction 'fmaxnmp' {ASIMD}.
+    kIdFmaxnmv_v,                        //!< Instruction 'fmaxnmv' {ASIMD}.
+    kIdFmaxp_v,                          //!< Instruction 'fmaxp' {ASIMD}.
+    kIdFmaxv_v,                          //!< Instruction 'fmaxv' {ASIMD}.
+    kIdFmin_v,                           //!< Instruction 'fmin' {ASIMD}.
+    kIdFminnm_v,                         //!< Instruction 'fminnm' {ASIMD}.
+    kIdFminnmp_v,                        //!< Instruction 'fminnmp' {ASIMD}.
+    kIdFminnmv_v,                        //!< Instruction 'fminnmv' {ASIMD}.
+    kIdFminp_v,                          //!< Instruction 'fminp' {ASIMD}.
+    kIdFminv_v,                          //!< Instruction 'fminv' {ASIMD}.
+    kIdFmla_v,                           //!< Instruction 'fmla' {ASIMD}.
+    kIdFmlal_v,                          //!< Instruction 'fmlal' {ASIMD}.
+    kIdFmlal2_v,                         //!< Instruction 'fmlal2' {ASIMD}.
+    kIdFmls_v,                           //!< Instruction 'fmls' {ASIMD}.
+    kIdFmlsl_v,                          //!< Instruction 'fmlsl' {ASIMD}.
+    kIdFmlsl2_v,                         //!< Instruction 'fmlsl2' {ASIMD}.
+    kIdFmov_v,                           //!< Instruction 'fmov' {ASIMD}.
+    kIdFmsub_v,                          //!< Instruction 'fmsub' {ASIMD}.
+    kIdFmul_v,                           //!< Instruction 'fmul' {ASIMD}.
+    kIdFmulx_v,                          //!< Instruction 'fmulx' {ASIMD}.
+    kIdFneg_v,                           //!< Instruction 'fneg' {ASIMD}.
+    kIdFnmadd_v,                         //!< Instruction 'fnmadd' {ASIMD}.
+    kIdFnmsub_v,                         //!< Instruction 'fnmsub' {ASIMD}.
+    kIdFnmul_v,                          //!< Instruction 'fnmul' {ASIMD}.
+    kIdFrecpe_v,                         //!< Instruction 'frecpe' {ASIMD}.
+    kIdFrecps_v,                         //!< Instruction 'frecps' {ASIMD}.
+    kIdFrecpx_v,                         //!< Instruction 'frecpx' {ASIMD}.
+    kIdFrint32x_v,                       //!< Instruction 'frint32x' {ASIMD}.
+    kIdFrint32z_v,                       //!< Instruction 'frint32z' {ASIMD}.
+    kIdFrint64x_v,                       //!< Instruction 'frint64x' {ASIMD}.
+    kIdFrint64z_v,                       //!< Instruction 'frint64z' {ASIMD}.
+    kIdFrinta_v,                         //!< Instruction 'frinta' {ASIMD}.
+    kIdFrinti_v,                         //!< Instruction 'frinti' {ASIMD}.
+    kIdFrintm_v,                         //!< Instruction 'frintm' {ASIMD}.
+    kIdFrintn_v,                         //!< Instruction 'frintn' {ASIMD}.
+    kIdFrintp_v,                         //!< Instruction 'frintp' {ASIMD}.
+    kIdFrintx_v,                         //!< Instruction 'frintx' {ASIMD}.
+    kIdFrintz_v,                         //!< Instruction 'frintz' {ASIMD}.
+    kIdFrsqrte_v,                        //!< Instruction 'frsqrte' {ASIMD}.
+    kIdFrsqrts_v,                        //!< Instruction 'frsqrts' {ASIMD}.
+    kIdFsqrt_v,                          //!< Instruction 'fsqrt' {ASIMD}.
+    kIdFsub_v,                           //!< Instruction 'fsub' {ASIMD}.
+    kIdIns_v,                            //!< Instruction 'ins' {ASIMD}.
+    kIdLd1_v,                            //!< Instruction 'ld1' {ASIMD}.
+    kIdLd1r_v,                           //!< Instruction 'ld1r' {ASIMD}.
+    kIdLd2_v,                            //!< Instruction 'ld2' {ASIMD}.
+    kIdLd2r_v,                           //!< Instruction 'ld2r' {ASIMD}.
+    kIdLd3_v,                            //!< Instruction 'ld3' {ASIMD}.
+    kIdLd3r_v,                           //!< Instruction 'ld3r' {ASIMD}.
+    kIdLd4_v,                            //!< Instruction 'ld4' {ASIMD}.
+    kIdLd4r_v,                           //!< Instruction 'ld4r' {ASIMD}.
+    kIdLdnp_v,                           //!< Instruction 'ldnp' {ASIMD}.
+    kIdLdp_v,                            //!< Instruction 'ldp' {ASIMD}.
+    kIdLdr_v,                            //!< Instruction 'ldr' {ASIMD}.
+    kIdLdur_v,                           //!< Instruction 'ldur' {ASIMD}.
+    kIdMla_v,                            //!< Instruction 'mla' {ASIMD}.
+    kIdMls_v,                            //!< Instruction 'mls' {ASIMD}.
+    kIdMov_v,                            //!< Instruction 'mov' {ASIMD}.
+    kIdMovi_v,                           //!< Instruction 'movi' {ASIMD}.
+    kIdMul_v,                            //!< Instruction 'mul' {ASIMD}.
+    kIdMvn_v,                            //!< Instruction 'mvn' {ASIMD}.
+    kIdMvni_v,                           //!< Instruction 'mvni' {ASIMD}.
+    kIdNeg_v,                            //!< Instruction 'neg' {ASIMD}.
+    kIdNot_v,                            //!< Instruction 'not' {ASIMD}.
+    kIdOrn_v,                            //!< Instruction 'orn' {ASIMD}.
+    kIdOrr_v,                            //!< Instruction 'orr' {ASIMD}.
+    kIdPmul_v,                           //!< Instruction 'pmul' {ASIMD}.
+    kIdPmull_v,                          //!< Instruction 'pmull' {ASIMD}.
+    kIdPmull2_v,                         //!< Instruction 'pmull2' {ASIMD}.
+    kIdRaddhn_v,                         //!< Instruction 'raddhn' {ASIMD}.
+    kIdRaddhn2_v,                        //!< Instruction 'raddhn2' {ASIMD}.
+    kIdRax1_v,                           //!< Instruction 'rax1' {ASIMD}.
+    kIdRbit_v,                           //!< Instruction 'rbit' {ASIMD}.
+    kIdRev16_v,                          //!< Instruction 'rev16' {ASIMD}.
+    kIdRev32_v,                          //!< Instruction 'rev32' {ASIMD}.
+    kIdRev64_v,                          //!< Instruction 'rev64' {ASIMD}.
+    kIdRshrn_v,                          //!< Instruction 'rshrn' {ASIMD}.
+    kIdRshrn2_v,                         //!< Instruction 'rshrn2' {ASIMD}.
+    kIdRsubhn_v,                         //!< Instruction 'rsubhn' {ASIMD}.
+    kIdRsubhn2_v,                        //!< Instruction 'rsubhn2' {ASIMD}.
+    kIdSaba_v,                           //!< Instruction 'saba' {ASIMD}.
+    kIdSabal_v,                          //!< Instruction 'sabal' {ASIMD}.
+    kIdSabal2_v,                         //!< Instruction 'sabal2' {ASIMD}.
+    kIdSabd_v,                           //!< Instruction 'sabd' {ASIMD}.
+    kIdSabdl_v,                          //!< Instruction 'sabdl' {ASIMD}.
+    kIdSabdl2_v,                         //!< Instruction 'sabdl2' {ASIMD}.
+    kIdSadalp_v,                         //!< Instruction 'sadalp' {ASIMD}.
+    kIdSaddl_v,                          //!< Instruction 'saddl' {ASIMD}.
+    kIdSaddl2_v,                         //!< Instruction 'saddl2' {ASIMD}.
+    kIdSaddlp_v,                         //!< Instruction 'saddlp' {ASIMD}.
+    kIdSaddlv_v,                         //!< Instruction 'saddlv' {ASIMD}.
+    kIdSaddw_v,                          //!< Instruction 'saddw' {ASIMD}.
+    kIdSaddw2_v,                         //!< Instruction 'saddw2' {ASIMD}.
+    kIdScvtf_v,                          //!< Instruction 'scvtf' {ASIMD}.
+    kIdSdot_v,                           //!< Instruction 'sdot' {ASIMD}.
+    kIdSha1c_v,                          //!< Instruction 'sha1c' {ASIMD}.
+    kIdSha1h_v,                          //!< Instruction 'sha1h' {ASIMD}.
+    kIdSha1m_v,                          //!< Instruction 'sha1m' {ASIMD}.
+    kIdSha1p_v,                          //!< Instruction 'sha1p' {ASIMD}.
+    kIdSha1su0_v,                        //!< Instruction 'sha1su0' {ASIMD}.
+    kIdSha1su1_v,                        //!< Instruction 'sha1su1' {ASIMD}.
+    kIdSha256h_v,                        //!< Instruction 'sha256h' {ASIMD}.
+    kIdSha256h2_v,                       //!< Instruction 'sha256h2' {ASIMD}.
+    kIdSha256su0_v,                      //!< Instruction 'sha256su0' {ASIMD}.
+    kIdSha256su1_v,                      //!< Instruction 'sha256su1' {ASIMD}.
+    kIdSha512h_v,                        //!< Instruction 'sha512h' {ASIMD}.
+    kIdSha512h2_v,                       //!< Instruction 'sha512h2' {ASIMD}.
+    kIdSha512su0_v,                      //!< Instruction 'sha512su0' {ASIMD}.
+    kIdSha512su1_v,                      //!< Instruction 'sha512su1' {ASIMD}.
+    kIdShadd_v,                          //!< Instruction 'shadd' {ASIMD}.
+    kIdShl_v,                            //!< Instruction 'shl' {ASIMD}.
+    kIdShll_v,                           //!< Instruction 'shll' {ASIMD}.
+    kIdShll2_v,                          //!< Instruction 'shll2' {ASIMD}.
+    kIdShrn_v,                           //!< Instruction 'shrn' {ASIMD}.
+    kIdShrn2_v,                          //!< Instruction 'shrn2' {ASIMD}.
+    kIdShsub_v,                          //!< Instruction 'shsub' {ASIMD}.
+    kIdSli_v,                            //!< Instruction 'sli' {ASIMD}.
+    kIdSm3partw1_v,                      //!< Instruction 'sm3partw1' {ASIMD}.
+    kIdSm3partw2_v,                      //!< Instruction 'sm3partw2' {ASIMD}.
+    kIdSm3ss1_v,                         //!< Instruction 'sm3ss1' {ASIMD}.
+    kIdSm3tt1a_v,                        //!< Instruction 'sm3tt1a' {ASIMD}.
+    kIdSm3tt1b_v,                        //!< Instruction 'sm3tt1b' {ASIMD}.
+    kIdSm3tt2a_v,                        //!< Instruction 'sm3tt2a' {ASIMD}.
+    kIdSm3tt2b_v,                        //!< Instruction 'sm3tt2b' {ASIMD}.
+    kIdSm4e_v,                           //!< Instruction 'sm4e' {ASIMD}.
+    kIdSm4ekey_v,                        //!< Instruction 'sm4ekey' {ASIMD}.
+    kIdSmax_v,                           //!< Instruction 'smax' {ASIMD}.
+    kIdSmaxp_v,                          //!< Instruction 'smaxp' {ASIMD}.
+    kIdSmaxv_v,                          //!< Instruction 'smaxv' {ASIMD}.
+    kIdSmin_v,                           //!< Instruction 'smin' {ASIMD}.
+    kIdSminp_v,                          //!< Instruction 'sminp' {ASIMD}.
+    kIdSminv_v,                          //!< Instruction 'sminv' {ASIMD}.
+    kIdSmlal_v,                          //!< Instruction 'smlal' {ASIMD}.
+    kIdSmlal2_v,                         //!< Instruction 'smlal2' {ASIMD}.
+    kIdSmlsl_v,                          //!< Instruction 'smlsl' {ASIMD}.
+    kIdSmlsl2_v,                         //!< Instruction 'smlsl2' {ASIMD}.
+    kIdSmmla_v,                          //!< Instruction 'smmla' {ASIMD}.
+    kIdSmov_v,                           //!< Instruction 'smov' {ASIMD}.
+    kIdSmull_v,                          //!< Instruction 'smull' {ASIMD}.
+    kIdSmull2_v,                         //!< Instruction 'smull2' {ASIMD}.
+    kIdSqabs_v,                          //!< Instruction 'sqabs' {ASIMD}.
+    kIdSqadd_v,                          //!< Instruction 'sqadd' {ASIMD}.
+    kIdSqdmlal_v,                        //!< Instruction 'sqdmlal' {ASIMD}.
+    kIdSqdmlal2_v,                       //!< Instruction 'sqdmlal2' {ASIMD}.
+    kIdSqdmlsl_v,                        //!< Instruction 'sqdmlsl' {ASIMD}.
+    kIdSqdmlsl2_v,                       //!< Instruction 'sqdmlsl2' {ASIMD}.
+    kIdSqdmulh_v,                        //!< Instruction 'sqdmulh' {ASIMD}.
+    kIdSqdmull_v,                        //!< Instruction 'sqdmull' {ASIMD}.
+    kIdSqdmull2_v,                       //!< Instruction 'sqdmull2' {ASIMD}.
+    kIdSqneg_v,                          //!< Instruction 'sqneg' {ASIMD}.
+    kIdSqrdmlah_v,                       //!< Instruction 'sqrdmlah' {ASIMD}.
+    kIdSqrdmlsh_v,                       //!< Instruction 'sqrdmlsh' {ASIMD}.
+    kIdSqrdmulh_v,                       //!< Instruction 'sqrdmulh' {ASIMD}.
+    kIdSqrshl_v,                         //!< Instruction 'sqrshl' {ASIMD}.
+    kIdSqrshrn_v,                        //!< Instruction 'sqrshrn' {ASIMD}.
+    kIdSqrshrn2_v,                       //!< Instruction 'sqrshrn2' {ASIMD}.
+    kIdSqrshrun_v,                       //!< Instruction 'sqrshrun' {ASIMD}.
+    kIdSqrshrun2_v,                      //!< Instruction 'sqrshrun2' {ASIMD}.
+    kIdSqshl_v,                          //!< Instruction 'sqshl' {ASIMD}.
+    kIdSqshlu_v,                         //!< Instruction 'sqshlu' {ASIMD}.
+    kIdSqshrn_v,                         //!< Instruction 'sqshrn' {ASIMD}.
+    kIdSqshrn2_v,                        //!< Instruction 'sqshrn2' {ASIMD}.
+    kIdSqshrun_v,                        //!< Instruction 'sqshrun' {ASIMD}.
+    kIdSqshrun2_v,                       //!< Instruction 'sqshrun2' {ASIMD}.
+    kIdSqsub_v,                          //!< Instruction 'sqsub' {ASIMD}.
+    kIdSqxtn_v,                          //!< Instruction 'sqxtn' {ASIMD}.
+    kIdSqxtn2_v,                         //!< Instruction 'sqxtn2' {ASIMD}.
+    kIdSqxtun_v,                         //!< Instruction 'sqxtun' {ASIMD}.
+    kIdSqxtun2_v,                        //!< Instruction 'sqxtun2' {ASIMD}.
+    kIdSrhadd_v,                         //!< Instruction 'srhadd' {ASIMD}.
+    kIdSri_v,                            //!< Instruction 'sri' {ASIMD}.
+    kIdSrshl_v,                          //!< Instruction 'srshl' {ASIMD}.
+    kIdSrshr_v,                          //!< Instruction 'srshr' {ASIMD}.
+    kIdSrsra_v,                          //!< Instruction 'srsra' {ASIMD}.
+    kIdSshl_v,                           //!< Instruction 'sshl' {ASIMD}.
+    kIdSshll_v,                          //!< Instruction 'sshll' {ASIMD}.
+    kIdSshll2_v,                         //!< Instruction 'sshll2' {ASIMD}.
+    kIdSshr_v,                           //!< Instruction 'sshr' {ASIMD}.
+    kIdSsra_v,                           //!< Instruction 'ssra' {ASIMD}.
+    kIdSsubl_v,                          //!< Instruction 'ssubl' {ASIMD}.
+    kIdSsubl2_v,                         //!< Instruction 'ssubl2' {ASIMD}.
+    kIdSsubw_v,                          //!< Instruction 'ssubw' {ASIMD}.
+    kIdSsubw2_v,                         //!< Instruction 'ssubw2' {ASIMD}.
+    kIdSt1_v,                            //!< Instruction 'st1' {ASIMD}.
+    kIdSt2_v,                            //!< Instruction 'st2' {ASIMD}.
+    kIdSt3_v,                            //!< Instruction 'st3' {ASIMD}.
+    kIdSt4_v,                            //!< Instruction 'st4' {ASIMD}.
+    kIdStnp_v,                           //!< Instruction 'stnp' {ASIMD}.
+    kIdStp_v,                            //!< Instruction 'stp' {ASIMD}.
+    kIdStr_v,                            //!< Instruction 'str' {ASIMD}.
+    kIdStur_v,                           //!< Instruction 'stur' {ASIMD}.
+    kIdSub_v,                            //!< Instruction 'sub' {ASIMD}.
+    kIdSubhn_v,                          //!< Instruction 'subhn' {ASIMD}.
+    kIdSubhn2_v,                         //!< Instruction 'subhn2' {ASIMD}.
+    kIdSudot_v,                          //!< Instruction 'sudot' {ASIMD}.
+    kIdSuqadd_v,                         //!< Instruction 'suqadd' {ASIMD}.
+    kIdSxtl_v,                           //!< Instruction 'sxtl' {ASIMD}.
+    kIdSxtl2_v,                          //!< Instruction 'sxtl2' {ASIMD}.
+    kIdTbl_v,                            //!< Instruction 'tbl' {ASIMD}.
+    kIdTbx_v,                            //!< Instruction 'tbx' {ASIMD}.
+    kIdTrn1_v,                           //!< Instruction 'trn1' {ASIMD}.
+    kIdTrn2_v,                           //!< Instruction 'trn2' {ASIMD}.
+    kIdUaba_v,                           //!< Instruction 'uaba' {ASIMD}.
+    kIdUabal_v,                          //!< Instruction 'uabal' {ASIMD}.
+    kIdUabal2_v,                         //!< Instruction 'uabal2' {ASIMD}.
+    kIdUabd_v,                           //!< Instruction 'uabd' {ASIMD}.
+    kIdUabdl_v,                          //!< Instruction 'uabdl' {ASIMD}.
+    kIdUabdl2_v,                         //!< Instruction 'uabdl2' {ASIMD}.
+    kIdUadalp_v,                         //!< Instruction 'uadalp' {ASIMD}.
+    kIdUaddl_v,                          //!< Instruction 'uaddl' {ASIMD}.
+    kIdUaddl2_v,                         //!< Instruction 'uaddl2' {ASIMD}.
+    kIdUaddlp_v,                         //!< Instruction 'uaddlp' {ASIMD}.
+    kIdUaddlv_v,                         //!< Instruction 'uaddlv' {ASIMD}.
+    kIdUaddw_v,                          //!< Instruction 'uaddw' {ASIMD}.
+    kIdUaddw2_v,                         //!< Instruction 'uaddw2' {ASIMD}.
+    kIdUcvtf_v,                          //!< Instruction 'ucvtf' {ASIMD}.
+    kIdUdot_v,                           //!< Instruction 'udot' {ASIMD}.
+    kIdUhadd_v,                          //!< Instruction 'uhadd' {ASIMD}.
+    kIdUhsub_v,                          //!< Instruction 'uhsub' {ASIMD}.
+    kIdUmax_v,                           //!< Instruction 'umax' {ASIMD}.
+    kIdUmaxp_v,                          //!< Instruction 'umaxp' {ASIMD}.
+    kIdUmaxv_v,                          //!< Instruction 'umaxv' {ASIMD}.
+    kIdUmin_v,                           //!< Instruction 'umin' {ASIMD}.
+    kIdUminp_v,                          //!< Instruction 'uminp' {ASIMD}.
+    kIdUminv_v,                          //!< Instruction 'uminv' {ASIMD}.
+    kIdUmlal_v,                          //!< Instruction 'umlal' {ASIMD}.
+    kIdUmlal2_v,                         //!< Instruction 'umlal2' {ASIMD}.
+    kIdUmlsl_v,                          //!< Instruction 'umlsl' {ASIMD}.
+    kIdUmlsl2_v,                         //!< Instruction 'umlsl2' {ASIMD}.
+    kIdUmmla_v,                          //!< Instruction 'ummla' {ASIMD}.
+    kIdUmov_v,                           //!< Instruction 'umov' {ASIMD}.
+    kIdUmull_v,                          //!< Instruction 'umull' {ASIMD}.
+    kIdUmull2_v,                         //!< Instruction 'umull2' {ASIMD}.
+    kIdUqadd_v,                          //!< Instruction 'uqadd' {ASIMD}.
+    kIdUqrshl_v,                         //!< Instruction 'uqrshl' {ASIMD}.
+    kIdUqrshrn_v,                        //!< Instruction 'uqrshrn' {ASIMD}.
+    kIdUqrshrn2_v,                       //!< Instruction 'uqrshrn2' {ASIMD}.
+    kIdUqshl_v,                          //!< Instruction 'uqshl' {ASIMD}.
+    kIdUqshrn_v,                         //!< Instruction 'uqshrn' {ASIMD}.
+    kIdUqshrn2_v,                        //!< Instruction 'uqshrn2' {ASIMD}.
+    kIdUqsub_v,                          //!< Instruction 'uqsub' {ASIMD}.
+    kIdUqxtn_v,                          //!< Instruction 'uqxtn' {ASIMD}.
+    kIdUqxtn2_v,                         //!< Instruction 'uqxtn2' {ASIMD}.
+    kIdUrecpe_v,                         //!< Instruction 'urecpe' {ASIMD}.
+    kIdUrhadd_v,                         //!< Instruction 'urhadd' {ASIMD}.
+    kIdUrshl_v,                          //!< Instruction 'urshl' {ASIMD}.
+    kIdUrshr_v,                          //!< Instruction 'urshr' {ASIMD}.
+    kIdUrsqrte_v,                        //!< Instruction 'ursqrte' {ASIMD}.
+    kIdUrsra_v,                          //!< Instruction 'ursra' {ASIMD}.
+    kIdUsdot_v,                          //!< Instruction 'usdot' {ASIMD}.
+    kIdUshl_v,                           //!< Instruction 'ushl' {ASIMD}.
+    kIdUshll_v,                          //!< Instruction 'ushll' {ASIMD}.
+    kIdUshll2_v,                         //!< Instruction 'ushll2' {ASIMD}.
+    kIdUshr_v,                           //!< Instruction 'ushr' {ASIMD}.
+    kIdUsmmla_v,                         //!< Instruction 'usmmla' {ASIMD}.
+    kIdUsqadd_v,                         //!< Instruction 'usqadd' {ASIMD}.
+    kIdUsra_v,                           //!< Instruction 'usra' {ASIMD}.
+    kIdUsubl_v,                          //!< Instruction 'usubl' {ASIMD}.
+    kIdUsubl2_v,                         //!< Instruction 'usubl2' {ASIMD}.
+    kIdUsubw_v,                          //!< Instruction 'usubw' {ASIMD}.
+    kIdUsubw2_v,                         //!< Instruction 'usubw2' {ASIMD}.
+    kIdUxtl_v,                           //!< Instruction 'uxtl' {ASIMD}.
+    kIdUxtl2_v,                          //!< Instruction 'uxtl2' {ASIMD}.
+    kIdUzp1_v,                           //!< Instruction 'uzp1' {ASIMD}.
+    kIdUzp2_v,                           //!< Instruction 'uzp2' {ASIMD}.
+    kIdXar_v,                            //!< Instruction 'xar' {ASIMD}.
+    kIdXtn_v,                            //!< Instruction 'xtn' {ASIMD}.
+    kIdXtn2_v,                           //!< Instruction 'xtn2' {ASIMD}.
+    kIdZip1_v,                           //!< Instruction 'zip1' {ASIMD}.
+    kIdZip2_v,                           //!< Instruction 'zip2' {ASIMD}.
+    _kIdCount
+    // ${InstId:End}
+  };
+
+  //! Tests whether the `instId` is defined (counts also Inst::kIdNone, which must be zero).
+  static inline bool isDefinedId(InstId instId) noexcept { return (instId & uint32_t(InstIdParts::kRealId)) < _kIdCount; }
+};
+
+namespace Predicate {
+
+//! Address translate options (AT).
+namespace AT {
+  static inline constexpr uint32_t encode(uint32_t op1, uint32_t cRn, uint32_t cRm, uint32_t op2) noexcept {
+    return (op1 << 11) | (cRn << 7) | (cRm << 3) | (op2 << 0);
+  }
+
+  enum Value : uint32_t {
+    kS1E1R  = encode(0b000, 0b0111, 0b1000, 0b000),
+    kS1E2R  = encode(0b100, 0b0111, 0b1000, 0b000),
+    kS1E3R  = encode(0b110, 0b0111, 0b1000, 0b000),
+    kS1E1W  = encode(0b000, 0b0111, 0b1000, 0b001),
+    kS1E2W  = encode(0b100, 0b0111, 0b1000, 0b001),
+    kS1E3W  = encode(0b110, 0b0111, 0b1000, 0b001),
+    kS1E0R  = encode(0b000, 0b0111, 0b1000, 0b010),
+    kS1E0W  = encode(0b000, 0b0111, 0b1000, 0b011),
+    kS12E1R = encode(0b100, 0b0111, 0b1000, 0b100),
+    kS12E1W = encode(0b100, 0b0111, 0b1000, 0b101),
+    kS12E0R = encode(0b100, 0b0111, 0b1000, 0b110),
+    kS12E0W = encode(0b100, 0b0111, 0b1000, 0b111),
+    kS1E1RP = encode(0b000, 0b0111, 0b1001, 0b000),
+    kS1E1WP = encode(0b000, 0b0111, 0b1001, 0b001)
+  };
+}
+
+//! Data barrier options (DMB/DSB).
+namespace DB {
+  //! Data barrier immediate values.
+  enum Value : uint32_t {
+    //! Waits only for loads to complete, and only applies to the outer shareable domain.
+    kOSHLD = 0x01u,
+    //! Waits only for stores to complete, and only applies to the outer shareable domain.
+    kOSHST = 0x02u,
+    //! Only applies to the outer shareable domain.
+    kOSH = 0x03u,
+
+    //! Waits only for loads to complete and only applies out to the point of unification.
+    kNSHLD = 0x05u,
+    //! Waits only for stores to complete and only applies out to the point of unification.
+    kNSHST = 0x06u,
+    //! Only applies out to the point of unification.
+    kNSH = 0x07u,
+
+    //! Waits only for loads to complete, and only applies to the inner shareable domain.
+    kISHLD = 0x09u,
+    //! Waits only for stores to complete, and only applies to the inner shareable domain.
+    kISHST = 0x0Au,
+    //! Only applies to the inner shareable domain.
+    kISH = 0x0Bu,
+
+    //! Waits only for loads to complete.
+    kLD = 0x0Du,
+    //! Waits only for stores to complete.
+    kST = 0x0Eu,
+    //! Full system memory barrier operation.
+    kSY = 0x0Fu
+  };
+}
+
+//! Data cache maintenance options.
+namespace DC {
+  static inline constexpr uint32_t encode(uint32_t op1, uint32_t cRn, uint32_t cRm, uint32_t op2) noexcept {
+    return (op1 << 11) | (cRn << 7) | (cRm << 3) | (op2 << 0);
+  }
+
+  //! Data cache maintenance immediate values.
+  enum Value : uint32_t {
+    kZVA     = encode(0b011, 0b0111, 0b0100, 0b001),
+    kIVAC    = encode(0b000, 0b0111, 0b0110, 0b001),
+    kISW     = encode(0b000, 0b0111, 0b0110, 0b010),
+    kCVAC    = encode(0b011, 0b0111, 0b1010, 0b001),
+    kCSW     = encode(0b000, 0b0111, 0b1010, 0b010),
+    kCVAU    = encode(0b011, 0b0111, 0b1011, 0b001),
+    kCIVAC   = encode(0b011, 0b0111, 0b1110, 0b001),
+    kCISW    = encode(0b000, 0b0111, 0b1110, 0b010),
+    kCVAP    = encode(0b011, 0b0111, 0b1100, 0b001),
+    kCVADP   = encode(0b011, 0b0111, 0b1101, 0b001),
+    kIGVAC   = encode(0b000, 0b0111, 0b0110, 0b011),
+    kIGSW    = encode(0b000, 0b0111, 0b0110, 0b100),
+    kCGSW    = encode(0b000, 0b0111, 0b1010, 0b100),
+    kCIGSW   = encode(0b000, 0b0111, 0b1110, 0b100),
+    kCGVAC   = encode(0b011, 0b0111, 0b1010, 0b011),
+    kCGVAP   = encode(0b011, 0b0111, 0b1100, 0b011),
+    kCGVADP  = encode(0b011, 0b0111, 0b1101, 0b011),
+    kCIGVAC  = encode(0b011, 0b0111, 0b1110, 0b011),
+    kGVA     = encode(0b011, 0b0111, 0b0100, 0b011),
+    kIGDVAC  = encode(0b000, 0b0111, 0b0110, 0b101),
+    kIGDSW   = encode(0b000, 0b0111, 0b0110, 0b110),
+    kCGDSW   = encode(0b000, 0b0111, 0b1010, 0b110),
+    kCIGDSW  = encode(0b000, 0b0111, 0b1110, 0b110),
+    kCGDVAC  = encode(0b011, 0b0111, 0b1010, 0b101),
+    kCGDVAP  = encode(0b011, 0b0111, 0b1100, 0b101),
+    kCGDVADP = encode(0b011, 0b0111, 0b1101, 0b101),
+    kCIGDVAC = encode(0b011, 0b0111, 0b1110, 0b101),
+    kGZVA    = encode(0b011, 0b0111, 0b0100, 0b100)
+  };
+}
+
+//! Instruction cache maintenance options.
+namespace IC {
+  static inline constexpr uint32_t encode(uint32_t op1, uint32_t cRn, uint32_t cRm, uint32_t op2) noexcept {
+    return (op1 << 11) | (cRn << 7) | (cRm << 3) | (op2 << 0);
+  }
+
+  //! Instruction cache maintenance immediate values.
+  enum Value : uint32_t {
+    kIALLUIS = encode(0b000, 0b0111, 0b0001, 0b000),
+    kIALLU   = encode(0b000, 0b0111, 0b0101, 0b000),
+    kIVAU    = encode(0b011, 0b0111, 0b0101, 0b001)
+  };
+}
+
+//! Instruction-fetch barrier options.
+namespace ISB {
+  //! Instruction-fetch barrier immediate values.
+  enum Value : uint32_t {
+    kSY = 0xF
+  };
+}
+
+//! Prefetch options.
+namespace PRFOp {
+  //! Prefetch immediate values.
+  enum Value : uint32_t {
+    kPLDL1KEEP = 0x00,
+    kPLDL1STRM = 0x01,
+    kPLDL2KEEP = 0x02,
+    kPLDL2STRM = 0x03,
+    kPLDL3KEEP = 0x04,
+    kPLDL3STRM = 0x05,
+    kPLIL1KEEP = 0x08,
+    kPLIL1STRM = 0x09,
+    kPLIL2KEEP = 0x0A,
+    kPLIL2STRM = 0x0B,
+    kPLIL3KEEP = 0x0C,
+    kPLIL3STRM = 0x0D,
+    kPSTL1KEEP = 0x10,
+    kPSTL1STRM = 0x11,
+    kPSTL2KEEP = 0x12,
+    kPSTL2STRM = 0x13,
+    kPSTL3KEEP = 0x14,
+    kPSTL3STRM = 0x15
+  };
+}
+
+//! PSB instruction options.
+namespace PSB {
+  //! PSB immediate values.
+  enum Value : uint32_t {
+    kCSYNC = 0x11u
+  };
+}
+
+namespace TLBI {
+  static inline constexpr uint32_t encode(uint32_t op1, uint32_t cRn, uint32_t cRm, uint32_t op2) noexcept {
+    return (op1 << 11) | (cRn << 7) | (cRm << 3) | (op2 << 0);
+  }
+
+  enum Value : uint32_t {
+    kIPAS2E1IS    = encode(0b100, 0b1000, 0b0000, 0b001),
+    kIPAS2LE1IS   = encode(0b100, 0b1000, 0b0000, 0b101),
+    kVMALLE1IS    = encode(0b000, 0b1000, 0b0011, 0b000),
+    kALLE2IS      = encode(0b100, 0b1000, 0b0011, 0b000),
+    kALLE3IS      = encode(0b110, 0b1000, 0b0011, 0b000),
+    kVAE1IS       = encode(0b000, 0b1000, 0b0011, 0b001),
+    kVAE2IS       = encode(0b100, 0b1000, 0b0011, 0b001),
+    kVAE3IS       = encode(0b110, 0b1000, 0b0011, 0b001),
+    kASIDE1IS     = encode(0b000, 0b1000, 0b0011, 0b010),
+    kVAAE1IS      = encode(0b000, 0b1000, 0b0011, 0b011),
+    kALLE1IS      = encode(0b100, 0b1000, 0b0011, 0b100),
+    kVALE1IS      = encode(0b000, 0b1000, 0b0011, 0b101),
+    kVALE2IS      = encode(0b100, 0b1000, 0b0011, 0b101),
+    kVALE3IS      = encode(0b110, 0b1000, 0b0011, 0b101),
+    kVMALLS12E1IS = encode(0b100, 0b1000, 0b0011, 0b110),
+    kVAALE1IS     = encode(0b000, 0b1000, 0b0011, 0b111),
+    kIPAS2E1      = encode(0b100, 0b1000, 0b0100, 0b001),
+    kIPAS2LE1     = encode(0b100, 0b1000, 0b0100, 0b101),
+    kVMALLE1      = encode(0b000, 0b1000, 0b0111, 0b000),
+    kALLE2        = encode(0b100, 0b1000, 0b0111, 0b000),
+    kALLE3        = encode(0b110, 0b1000, 0b0111, 0b000),
+    kVAE1         = encode(0b000, 0b1000, 0b0111, 0b001),
+    kVAE2         = encode(0b100, 0b1000, 0b0111, 0b001),
+    kVAE3         = encode(0b110, 0b1000, 0b0111, 0b001),
+    kASIDE1       = encode(0b000, 0b1000, 0b0111, 0b010),
+    kVAAE1        = encode(0b000, 0b1000, 0b0111, 0b011),
+    kALLE1        = encode(0b100, 0b1000, 0b0111, 0b100),
+    kVALE1        = encode(0b000, 0b1000, 0b0111, 0b101),
+    kVALE2        = encode(0b100, 0b1000, 0b0111, 0b101),
+    kVALE3        = encode(0b110, 0b1000, 0b0111, 0b101),
+    kVMALLS12E1   = encode(0b100, 0b1000, 0b0111, 0b110),
+    kVAALE1       = encode(0b000, 0b1000, 0b0111, 0b111),
+
+    kVMALLE1OS    = encode(0b000, 0b1000, 0b0001, 0b000),
+    kVAE1OS       = encode(0b000, 0b1000, 0b0001, 0b001),
+    kASIDE1OS     = encode(0b000, 0b1000, 0b0001, 0b010),
+    kVAAE1OS      = encode(0b000, 0b1000, 0b0001, 0b011),
+    kVALE1OS      = encode(0b000, 0b1000, 0b0001, 0b101),
+    kVAALE1OS     = encode(0b000, 0b1000, 0b0001, 0b111),
+    kIPAS2E1OS    = encode(0b100, 0b1000, 0b0100, 0b000),
+    kIPAS2LE1OS   = encode(0b100, 0b1000, 0b0100, 0b100),
+    kVAE2OS       = encode(0b100, 0b1000, 0b0001, 0b001),
+    kVALE2OS      = encode(0b100, 0b1000, 0b0001, 0b101),
+    kVMALLS12E1OS = encode(0b100, 0b1000, 0b0001, 0b110),
+    kVAE3OS       = encode(0b110, 0b1000, 0b0001, 0b001),
+    kVALE3OS      = encode(0b110, 0b1000, 0b0001, 0b101),
+    kALLE2OS      = encode(0b100, 0b1000, 0b0001, 0b000),
+    kALLE1OS      = encode(0b100, 0b1000, 0b0001, 0b100),
+    kALLE3OS      = encode(0b110, 0b1000, 0b0001, 0b000),
+
+    kRVAE1        = encode(0b000, 0b1000, 0b0110, 0b001),
+    kRVAAE1       = encode(0b000, 0b1000, 0b0110, 0b011),
+    kRVALE1       = encode(0b000, 0b1000, 0b0110, 0b101),
+    kRVAALE1      = encode(0b000, 0b1000, 0b0110, 0b111),
+    kRVAE1IS      = encode(0b000, 0b1000, 0b0010, 0b001),
+    kRVAAE1IS     = encode(0b000, 0b1000, 0b0010, 0b011),
+    kRVALE1IS     = encode(0b000, 0b1000, 0b0010, 0b101),
+    kRVAALE1IS    = encode(0b000, 0b1000, 0b0010, 0b111),
+    kRVAE1OS      = encode(0b000, 0b1000, 0b0101, 0b001),
+    kRVAAE1OS     = encode(0b000, 0b1000, 0b0101, 0b011),
+    kRVALE1OS     = encode(0b000, 0b1000, 0b0101, 0b101),
+    kRVAALE1OS    = encode(0b000, 0b1000, 0b0101, 0b111),
+    kRIPAS2E1IS   = encode(0b100, 0b1000, 0b0000, 0b010),
+    kRIPAS2LE1IS  = encode(0b100, 0b1000, 0b0000, 0b110),
+    kRIPAS2E1     = encode(0b100, 0b1000, 0b0100, 0b010),
+    kRIPAS2LE1    = encode(0b100, 0b1000, 0b0100, 0b110),
+    kRIPAS2E1OS   = encode(0b100, 0b1000, 0b0100, 0b011),
+    kRIPAS2LE1OS  = encode(0b100, 0b1000, 0b0100, 0b111),
+    kRVAE2        = encode(0b100, 0b1000, 0b0110, 0b001),
+    kRVALE2       = encode(0b100, 0b1000, 0b0110, 0b101),
+    kRVAE2IS      = encode(0b100, 0b1000, 0b0010, 0b001),
+    kRVALE2IS     = encode(0b100, 0b1000, 0b0010, 0b101),
+    kRVAE2OS      = encode(0b100, 0b1000, 0b0101, 0b001),
+    kRVALE2OS     = encode(0b100, 0b1000, 0b0101, 0b101),
+    kRVAE3        = encode(0b110, 0b1000, 0b0110, 0b001),
+    kRVALE3       = encode(0b110, 0b1000, 0b0110, 0b101),
+    kRVAE3IS      = encode(0b110, 0b1000, 0b0010, 0b001),
+    kRVALE3IS     = encode(0b110, 0b1000, 0b0010, 0b101),
+    kRVAE3OS      = encode(0b110, 0b1000, 0b0101, 0b001),
+    kRVALE3OS     = encode(0b110, 0b1000, 0b0101, 0b101),
+  };
+}
+
+//! Trace synchronization barrier options.
+namespace TSB {
+  //! Trace synchronization immediate values.
+  enum Value : uint32_t {
+    kCSYNC = 0
+  };
+}
+
+//! Processor state access through MSR.
+namespace PState {
+  //! Encodes a pstate from `op0` and `op1`.
+  static inline constexpr uint32_t encode(uint32_t op0, uint32_t op1) noexcept {
+    return (op0 << 3) | (op1 << 0);
+  }
+
+  //! Processor state access immediates.
+  enum Value : uint32_t {
+    kSPSel   = encode(0b000, 0b101),
+    kDAIFSet = encode(0b011, 0b110),
+    kDAIFClr = encode(0b011, 0b111),
+    kPAN     = encode(0b000, 0b100),
+    kUAO     = encode(0b000, 0b011),
+    kDIT     = encode(0b011, 0b010),
+    kSSBS    = encode(0b011, 0b001),
+    kTCO     = encode(0b011, 0b100)
+  };
+};
+
+//! System register identifiers and utilities (MSR/MRS).
+namespace SysReg {
+  //! System register fields.
+  struct Fields {
+    uint8_t op0;
+    uint8_t op1;
+    uint8_t cRn;
+    uint8_t cRm;
+    uint8_t op2;
+  };
+
+  //! Encodes a system register from `op0`, `op1`, `cRn`, `cRm`, and `op2` fields.
+  static inline constexpr uint32_t encode(uint32_t op0, uint32_t op1, uint32_t cRn, uint32_t cRm, uint32_t op2) noexcept {
+    return (op0 << 14) | (op1 << 11) | (cRn << 7) | (cRm << 3) | (op2 << 0);
+  }
+
+  //! Encodes a system register from `fields`.
+  static inline constexpr uint32_t encode(const Fields& fields) noexcept {
+    return encode(fields.op0, fields.op1, fields.cRn, fields.cRm, fields.op2);
+  }
+
+  //! Decodes a system register to \ref Fields.
+  static inline constexpr Fields decode(uint32_t id) noexcept {
+    return Fields {
+      uint8_t((id >> 14) & 0x3u),
+      uint8_t((id >> 11) & 0x7u),
+      uint8_t((id >>  7) & 0xFu),
+      uint8_t((id >>  3) & 0xFu),
+      uint8_t((id >>  0) & 0x7u)
+    };
+  }
+
+  //! System register identifiers.
+  enum Id : uint32_t {
+    kACTLR_EL1            = encode(0b11, 0b000, 0b0001, 0b0000, 0b001), // RW
+    kACTLR_EL2            = encode(0b11, 0b100, 0b0001, 0b0000, 0b001), // RW
+    kACTLR_EL3            = encode(0b11, 0b110, 0b0001, 0b0000, 0b001), // RW
+    kAFSR0_EL1            = encode(0b11, 0b000, 0b0101, 0b0001, 0b000), // RW
+    kAFSR0_EL12           = encode(0b11, 0b101, 0b0101, 0b0001, 0b000), // RW
+    kAFSR0_EL2            = encode(0b11, 0b100, 0b0101, 0b0001, 0b000), // RW
+    kAFSR0_EL3            = encode(0b11, 0b110, 0b0101, 0b0001, 0b000), // RW
+    kAFSR1_EL1            = encode(0b11, 0b000, 0b0101, 0b0001, 0b001), // RW
+    kAFSR1_EL12           = encode(0b11, 0b101, 0b0101, 0b0001, 0b001), // RW
+    kAFSR1_EL2            = encode(0b11, 0b100, 0b0101, 0b0001, 0b001), // RW
+    kAFSR1_EL3            = encode(0b11, 0b110, 0b0101, 0b0001, 0b001), // RW
+    kAIDR_EL1             = encode(0b11, 0b001, 0b0000, 0b0000, 0b111), // RO
+    kAMAIR_EL1            = encode(0b11, 0b000, 0b1010, 0b0011, 0b000), // RW
+    kAMAIR_EL12           = encode(0b11, 0b101, 0b1010, 0b0011, 0b000), // RW
+    kAMAIR_EL2            = encode(0b11, 0b100, 0b1010, 0b0011, 0b000), // RW
+    kAMAIR_EL3            = encode(0b11, 0b110, 0b1010, 0b0011, 0b000), // RW
+    kAMCFGR_EL0           = encode(0b11, 0b011, 0b1101, 0b0010, 0b001), // RO
+    kAMCGCR_EL0           = encode(0b11, 0b011, 0b1101, 0b0010, 0b010), // RO
+    kAMCNTENCLR0_EL0      = encode(0b11, 0b011, 0b1101, 0b0010, 0b100), // RW
+    kAMCNTENCLR1_EL0      = encode(0b11, 0b011, 0b1101, 0b0011, 0b000), // RW
+    kAMCNTENSET0_EL0      = encode(0b11, 0b011, 0b1101, 0b0010, 0b101), // RW
+    kAMCNTENSET1_EL0      = encode(0b11, 0b011, 0b1101, 0b0011, 0b001), // RW
+    kAMCR_EL0             = encode(0b11, 0b011, 0b1101, 0b0010, 0b000), // RW
+    kAMEVCNTR00_EL0       = encode(0b11, 0b011, 0b1101, 0b0100, 0b000), // RW
+    kAMEVCNTR01_EL0       = encode(0b11, 0b011, 0b1101, 0b0100, 0b001), // RW
+    kAMEVCNTR02_EL0       = encode(0b11, 0b011, 0b1101, 0b0100, 0b010), // RW
+    kAMEVCNTR03_EL0       = encode(0b11, 0b011, 0b1101, 0b0100, 0b011), // RW
+    kAMEVCNTR10_EL0       = encode(0b11, 0b011, 0b1101, 0b1100, 0b000), // RW
+    kAMEVCNTR110_EL0      = encode(0b11, 0b011, 0b1101, 0b1101, 0b010), // RW
+    kAMEVCNTR111_EL0      = encode(0b11, 0b011, 0b1101, 0b1101, 0b011), // RW
+    kAMEVCNTR112_EL0      = encode(0b11, 0b011, 0b1101, 0b1101, 0b100), // RW
+    kAMEVCNTR113_EL0      = encode(0b11, 0b011, 0b1101, 0b1101, 0b101), // RW
+    kAMEVCNTR114_EL0      = encode(0b11, 0b011, 0b1101, 0b1101, 0b110), // RW
+    kAMEVCNTR115_EL0      = encode(0b11, 0b011, 0b1101, 0b1101, 0b111), // RW
+    kAMEVCNTR11_EL0       = encode(0b11, 0b011, 0b1101, 0b1100, 0b001), // RW
+    kAMEVCNTR12_EL0       = encode(0b11, 0b011, 0b1101, 0b1100, 0b010), // RW
+    kAMEVCNTR13_EL0       = encode(0b11, 0b011, 0b1101, 0b1100, 0b011), // RW
+    kAMEVCNTR14_EL0       = encode(0b11, 0b011, 0b1101, 0b1100, 0b100), // RW
+    kAMEVCNTR15_EL0       = encode(0b11, 0b011, 0b1101, 0b1100, 0b101), // RW
+    kAMEVCNTR16_EL0       = encode(0b11, 0b011, 0b1101, 0b1100, 0b110), // RW
+    kAMEVCNTR17_EL0       = encode(0b11, 0b011, 0b1101, 0b1100, 0b111), // RW
+    kAMEVCNTR18_EL0       = encode(0b11, 0b011, 0b1101, 0b1101, 0b000), // RW
+    kAMEVCNTR19_EL0       = encode(0b11, 0b011, 0b1101, 0b1101, 0b001), // RW
+    kAMEVTYPER00_EL0      = encode(0b11, 0b011, 0b1101, 0b0110, 0b000), // RO
+    kAMEVTYPER01_EL0      = encode(0b11, 0b011, 0b1101, 0b0110, 0b001), // RO
+    kAMEVTYPER02_EL0      = encode(0b11, 0b011, 0b1101, 0b0110, 0b010), // RO
+    kAMEVTYPER03_EL0      = encode(0b11, 0b011, 0b1101, 0b0110, 0b011), // RO
+    kAMEVTYPER10_EL0      = encode(0b11, 0b011, 0b1101, 0b1110, 0b000), // RW
+    kAMEVTYPER110_EL0     = encode(0b11, 0b011, 0b1101, 0b1111, 0b010), // RW
+    kAMEVTYPER111_EL0     = encode(0b11, 0b011, 0b1101, 0b1111, 0b011), // RW
+    kAMEVTYPER112_EL0     = encode(0b11, 0b011, 0b1101, 0b1111, 0b100), // RW
+    kAMEVTYPER113_EL0     = encode(0b11, 0b011, 0b1101, 0b1111, 0b101), // RW
+    kAMEVTYPER114_EL0     = encode(0b11, 0b011, 0b1101, 0b1111, 0b110), // RW
+    kAMEVTYPER115_EL0     = encode(0b11, 0b011, 0b1101, 0b1111, 0b111), // RW
+    kAMEVTYPER11_EL0      = encode(0b11, 0b011, 0b1101, 0b1110, 0b001), // RW
+    kAMEVTYPER12_EL0      = encode(0b11, 0b011, 0b1101, 0b1110, 0b010), // RW
+    kAMEVTYPER13_EL0      = encode(0b11, 0b011, 0b1101, 0b1110, 0b011), // RW
+    kAMEVTYPER14_EL0      = encode(0b11, 0b011, 0b1101, 0b1110, 0b100), // RW
+    kAMEVTYPER15_EL0      = encode(0b11, 0b011, 0b1101, 0b1110, 0b101), // RW
+    kAMEVTYPER16_EL0      = encode(0b11, 0b011, 0b1101, 0b1110, 0b110), // RW
+    kAMEVTYPER17_EL0      = encode(0b11, 0b011, 0b1101, 0b1110, 0b111), // RW
+    kAMEVTYPER18_EL0      = encode(0b11, 0b011, 0b1101, 0b1111, 0b000), // RW
+    kAMEVTYPER19_EL0      = encode(0b11, 0b011, 0b1101, 0b1111, 0b001), // RW
+    kAMUSERENR_EL0        = encode(0b11, 0b011, 0b1101, 0b0010, 0b011), // RW
+    kAPDAKeyHi_EL1        = encode(0b11, 0b000, 0b0010, 0b0010, 0b001), // RW
+    kAPDAKeyLo_EL1        = encode(0b11, 0b000, 0b0010, 0b0010, 0b000), // RW
+    kAPDBKeyHi_EL1        = encode(0b11, 0b000, 0b0010, 0b0010, 0b011), // RW
+    kAPDBKeyLo_EL1        = encode(0b11, 0b000, 0b0010, 0b0010, 0b010), // RW
+    kAPGAKeyHi_EL1        = encode(0b11, 0b000, 0b0010, 0b0011, 0b001), // RW
+    kAPGAKeyLo_EL1        = encode(0b11, 0b000, 0b0010, 0b0011, 0b000), // RW
+    kAPIAKeyHi_EL1        = encode(0b11, 0b000, 0b0010, 0b0001, 0b001), // RW
+    kAPIAKeyLo_EL1        = encode(0b11, 0b000, 0b0010, 0b0001, 0b000), // RW
+    kAPIBKeyHi_EL1        = encode(0b11, 0b000, 0b0010, 0b0001, 0b011), // RW
+    kAPIBKeyLo_EL1        = encode(0b11, 0b000, 0b0010, 0b0001, 0b010), // RW
+    kCCSIDR2_EL1          = encode(0b11, 0b001, 0b0000, 0b0000, 0b010), // RO
+    kCCSIDR_EL1           = encode(0b11, 0b001, 0b0000, 0b0000, 0b000), // RO
+    kCLIDR_EL1            = encode(0b11, 0b001, 0b0000, 0b0000, 0b001), // RO
+    kCNTFRQ_EL0           = encode(0b11, 0b011, 0b1110, 0b0000, 0b000), // RW
+    kCNTHCTL_EL2          = encode(0b11, 0b100, 0b1110, 0b0001, 0b000), // RW
+    kCNTHPS_CTL_EL2       = encode(0b11, 0b100, 0b1110, 0b0101, 0b001), // RW
+    kCNTHPS_CVAL_EL2      = encode(0b11, 0b100, 0b1110, 0b0101, 0b010), // RW
+    kCNTHPS_TVAL_EL2      = encode(0b11, 0b100, 0b1110, 0b0101, 0b000), // RW
+    kCNTHP_CTL_EL2        = encode(0b11, 0b100, 0b1110, 0b0010, 0b001), // RW
+    kCNTHP_CVAL_EL2       = encode(0b11, 0b100, 0b1110, 0b0010, 0b010), // RW
+    kCNTHP_TVAL_EL2       = encode(0b11, 0b100, 0b1110, 0b0010, 0b000), // RW
+    kCNTHVS_CTL_EL2       = encode(0b11, 0b100, 0b1110, 0b0100, 0b001), // RW
+    kCNTHVS_CVAL_EL2      = encode(0b11, 0b100, 0b1110, 0b0100, 0b010), // RW
+    kCNTHVS_TVAL_EL2      = encode(0b11, 0b100, 0b1110, 0b0100, 0b000), // RW
+    kCNTHV_CTL_EL2        = encode(0b11, 0b100, 0b1110, 0b0011, 0b001), // RW
+    kCNTHV_CVAL_EL2       = encode(0b11, 0b100, 0b1110, 0b0011, 0b010), // RW
+    kCNTHV_TVAL_EL2       = encode(0b11, 0b100, 0b1110, 0b0011, 0b000), // RW
+    kCNTISCALE_EL2        = encode(0b11, 0b100, 0b1110, 0b0000, 0b101), // RW
+    kCNTKCTL_EL1          = encode(0b11, 0b000, 0b1110, 0b0001, 0b000), // RW
+    kCNTKCTL_EL12         = encode(0b11, 0b101, 0b1110, 0b0001, 0b000), // RW
+    kCNTPCTSS_EL0         = encode(0b11, 0b011, 0b1110, 0b0000, 0b101), // RW
+    kCNTPCT_EL0           = encode(0b11, 0b011, 0b1110, 0b0000, 0b001), // RO
+    kCNTPOFF_EL2          = encode(0b11, 0b100, 0b1110, 0b0000, 0b110), // RW
+    kCNTPS_CTL_EL1        = encode(0b11, 0b111, 0b1110, 0b0010, 0b001), // RW
+    kCNTPS_CVAL_EL1       = encode(0b11, 0b111, 0b1110, 0b0010, 0b010), // RW
+    kCNTPS_TVAL_EL1       = encode(0b11, 0b111, 0b1110, 0b0010, 0b000), // RW
+    kCNTP_CTL_EL0         = encode(0b11, 0b011, 0b1110, 0b0010, 0b001), // RW
+    kCNTP_CTL_EL02        = encode(0b11, 0b101, 0b1110, 0b0010, 0b001), // RW
+    kCNTP_CVAL_EL0        = encode(0b11, 0b011, 0b1110, 0b0010, 0b010), // RW
+    kCNTP_CVAL_EL02       = encode(0b11, 0b101, 0b1110, 0b0010, 0b010), // RW
+    kCNTP_TVAL_EL0        = encode(0b11, 0b011, 0b1110, 0b0010, 0b000), // RW
+    kCNTP_TVAL_EL02       = encode(0b11, 0b101, 0b1110, 0b0010, 0b000), // RW
+    kCNTSCALE_EL2         = encode(0b11, 0b100, 0b1110, 0b0000, 0b100), // RW
+    kCNTVCTSS_EL0         = encode(0b11, 0b011, 0b1110, 0b0000, 0b110), // RW
+    kCNTVCT_EL0           = encode(0b11, 0b011, 0b1110, 0b0000, 0b010), // RO
+    kCNTVFRQ_EL2          = encode(0b11, 0b100, 0b1110, 0b0000, 0b111), // RW
+    kCNTVOFF_EL2          = encode(0b11, 0b100, 0b1110, 0b0000, 0b011), // RW
+    kCNTV_CTL_EL0         = encode(0b11, 0b011, 0b1110, 0b0011, 0b001), // RW
+    kCNTV_CTL_EL02        = encode(0b11, 0b101, 0b1110, 0b0011, 0b001), // RW
+    kCNTV_CVAL_EL0        = encode(0b11, 0b011, 0b1110, 0b0011, 0b010), // RW
+    kCNTV_CVAL_EL02       = encode(0b11, 0b101, 0b1110, 0b0011, 0b010), // RW
+    kCNTV_TVAL_EL0        = encode(0b11, 0b011, 0b1110, 0b0011, 0b000), // RW
+    kCNTV_TVAL_EL02       = encode(0b11, 0b101, 0b1110, 0b0011, 0b000), // RW
+    kCONTEXTIDR_EL1       = encode(0b11, 0b000, 0b1101, 0b0000, 0b001), // RW
+    kCONTEXTIDR_EL12      = encode(0b11, 0b101, 0b1101, 0b0000, 0b001), // RW
+    kCONTEXTIDR_EL2       = encode(0b11, 0b100, 0b1101, 0b0000, 0b001), // RW
+    kCPACR_EL1            = encode(0b11, 0b000, 0b0001, 0b0000, 0b010), // RW
+    kCPACR_EL12           = encode(0b11, 0b101, 0b0001, 0b0000, 0b010), // RW
+    kCPM_IOACC_CTL_EL3    = encode(0b11, 0b111, 0b1111, 0b0010, 0b000), // RW
+    kCPTR_EL2             = encode(0b11, 0b100, 0b0001, 0b0001, 0b010), // RW
+    kCPTR_EL3             = encode(0b11, 0b110, 0b0001, 0b0001, 0b010), // RW
+    kCSSELR_EL1           = encode(0b11, 0b010, 0b0000, 0b0000, 0b000), // RW
+    kCTR_EL0              = encode(0b11, 0b011, 0b0000, 0b0000, 0b001), // RO
+    kCurrentEL            = encode(0b11, 0b000, 0b0100, 0b0010, 0b010), // RO
+    kDACR32_EL2           = encode(0b11, 0b100, 0b0011, 0b0000, 0b000), // RW
+    kDAIF                 = encode(0b11, 0b011, 0b0100, 0b0010, 0b001), // RW
+    kDBGAUTHSTATUS_EL1    = encode(0b10, 0b000, 0b0111, 0b1110, 0b110), // RO
+    kDBGBCR0_EL1          = encode(0b10, 0b000, 0b0000, 0b0000, 0b101), // RW
+    kDBGBCR10_EL1         = encode(0b10, 0b000, 0b0000, 0b1010, 0b101), // RW
+    kDBGBCR11_EL1         = encode(0b10, 0b000, 0b0000, 0b1011, 0b101), // RW
+    kDBGBCR12_EL1         = encode(0b10, 0b000, 0b0000, 0b1100, 0b101), // RW
+    kDBGBCR13_EL1         = encode(0b10, 0b000, 0b0000, 0b1101, 0b101), // RW
+    kDBGBCR14_EL1         = encode(0b10, 0b000, 0b0000, 0b1110, 0b101), // RW
+    kDBGBCR15_EL1         = encode(0b10, 0b000, 0b0000, 0b1111, 0b101), // RW
+    kDBGBCR1_EL1          = encode(0b10, 0b000, 0b0000, 0b0001, 0b101), // RW
+    kDBGBCR2_EL1          = encode(0b10, 0b000, 0b0000, 0b0010, 0b101), // RW
+    kDBGBCR3_EL1          = encode(0b10, 0b000, 0b0000, 0b0011, 0b101), // RW
+    kDBGBCR4_EL1          = encode(0b10, 0b000, 0b0000, 0b0100, 0b101), // RW
+    kDBGBCR5_EL1          = encode(0b10, 0b000, 0b0000, 0b0101, 0b101), // RW
+    kDBGBCR6_EL1          = encode(0b10, 0b000, 0b0000, 0b0110, 0b101), // RW
+    kDBGBCR7_EL1          = encode(0b10, 0b000, 0b0000, 0b0111, 0b101), // RW
+    kDBGBCR8_EL1          = encode(0b10, 0b000, 0b0000, 0b1000, 0b101), // RW
+    kDBGBCR9_EL1          = encode(0b10, 0b000, 0b0000, 0b1001, 0b101), // RW
+    kDBGBVR0_EL1          = encode(0b10, 0b000, 0b0000, 0b0000, 0b100), // RW
+    kDBGBVR10_EL1         = encode(0b10, 0b000, 0b0000, 0b1010, 0b100), // RW
+    kDBGBVR11_EL1         = encode(0b10, 0b000, 0b0000, 0b1011, 0b100), // RW
+    kDBGBVR12_EL1         = encode(0b10, 0b000, 0b0000, 0b1100, 0b100), // RW
+    kDBGBVR13_EL1         = encode(0b10, 0b000, 0b0000, 0b1101, 0b100), // RW
+    kDBGBVR14_EL1         = encode(0b10, 0b000, 0b0000, 0b1110, 0b100), // RW
+    kDBGBVR15_EL1         = encode(0b10, 0b000, 0b0000, 0b1111, 0b100), // RW
+    kDBGBVR1_EL1          = encode(0b10, 0b000, 0b0000, 0b0001, 0b100), // RW
+    kDBGBVR2_EL1          = encode(0b10, 0b000, 0b0000, 0b0010, 0b100), // RW
+    kDBGBVR3_EL1          = encode(0b10, 0b000, 0b0000, 0b0011, 0b100), // RW
+    kDBGBVR4_EL1          = encode(0b10, 0b000, 0b0000, 0b0100, 0b100), // RW
+    kDBGBVR5_EL1          = encode(0b10, 0b000, 0b0000, 0b0101, 0b100), // RW
+    kDBGBVR6_EL1          = encode(0b10, 0b000, 0b0000, 0b0110, 0b100), // RW
+    kDBGBVR7_EL1          = encode(0b10, 0b000, 0b0000, 0b0111, 0b100), // RW
+    kDBGBVR8_EL1          = encode(0b10, 0b000, 0b0000, 0b1000, 0b100), // RW
+    kDBGBVR9_EL1          = encode(0b10, 0b000, 0b0000, 0b1001, 0b100), // RW
+    kDBGCLAIMCLR_EL1      = encode(0b10, 0b000, 0b0111, 0b1001, 0b110), // RW
+    kDBGCLAIMSET_EL1      = encode(0b10, 0b000, 0b0111, 0b1000, 0b110), // RW
+    kDBGDTRRX_EL0         = encode(0b10, 0b011, 0b0000, 0b0101, 0b000), // RO
+    kDBGDTRTX_EL0         = encode(0b10, 0b011, 0b0000, 0b0101, 0b000), // WO
+    kDBGDTR_EL0           = encode(0b10, 0b011, 0b0000, 0b0100, 0b000), // RW
+    kDBGPRCR_EL1          = encode(0b10, 0b000, 0b0001, 0b0100, 0b100), // RW
+    kDBGVCR32_EL2         = encode(0b10, 0b100, 0b0000, 0b0111, 0b000), // RW
+    kDBGWCR0_EL1          = encode(0b10, 0b000, 0b0000, 0b0000, 0b111), // RW
+    kDBGWCR10_EL1         = encode(0b10, 0b000, 0b0000, 0b1010, 0b111), // RW
+    kDBGWCR11_EL1         = encode(0b10, 0b000, 0b0000, 0b1011, 0b111), // RW
+    kDBGWCR12_EL1         = encode(0b10, 0b000, 0b0000, 0b1100, 0b111), // RW
+    kDBGWCR13_EL1         = encode(0b10, 0b000, 0b0000, 0b1101, 0b111), // RW
+    kDBGWCR14_EL1         = encode(0b10, 0b000, 0b0000, 0b1110, 0b111), // RW
+    kDBGWCR15_EL1         = encode(0b10, 0b000, 0b0000, 0b1111, 0b111), // RW
+    kDBGWCR1_EL1          = encode(0b10, 0b000, 0b0000, 0b0001, 0b111), // RW
+    kDBGWCR2_EL1          = encode(0b10, 0b000, 0b0000, 0b0010, 0b111), // RW
+    kDBGWCR3_EL1          = encode(0b10, 0b000, 0b0000, 0b0011, 0b111), // RW
+    kDBGWCR4_EL1          = encode(0b10, 0b000, 0b0000, 0b0100, 0b111), // RW
+    kDBGWCR5_EL1          = encode(0b10, 0b000, 0b0000, 0b0101, 0b111), // RW
+    kDBGWCR6_EL1          = encode(0b10, 0b000, 0b0000, 0b0110, 0b111), // RW
+    kDBGWCR7_EL1          = encode(0b10, 0b000, 0b0000, 0b0111, 0b111), // RW
+    kDBGWCR8_EL1          = encode(0b10, 0b000, 0b0000, 0b1000, 0b111), // RW
+    kDBGWCR9_EL1          = encode(0b10, 0b000, 0b0000, 0b1001, 0b111), // RW
+    kDBGWVR0_EL1          = encode(0b10, 0b000, 0b0000, 0b0000, 0b110), // RW
+    kDBGWVR10_EL1         = encode(0b10, 0b000, 0b0000, 0b1010, 0b110), // RW
+    kDBGWVR11_EL1         = encode(0b10, 0b000, 0b0000, 0b1011, 0b110), // RW
+    kDBGWVR12_EL1         = encode(0b10, 0b000, 0b0000, 0b1100, 0b110), // RW
+    kDBGWVR13_EL1         = encode(0b10, 0b000, 0b0000, 0b1101, 0b110), // RW
+    kDBGWVR14_EL1         = encode(0b10, 0b000, 0b0000, 0b1110, 0b110), // RW
+    kDBGWVR15_EL1         = encode(0b10, 0b000, 0b0000, 0b1111, 0b110), // RW
+    kDBGWVR1_EL1          = encode(0b10, 0b000, 0b0000, 0b0001, 0b110), // RW
+    kDBGWVR2_EL1          = encode(0b10, 0b000, 0b0000, 0b0010, 0b110), // RW
+    kDBGWVR3_EL1          = encode(0b10, 0b000, 0b0000, 0b0011, 0b110), // RW
+    kDBGWVR4_EL1          = encode(0b10, 0b000, 0b0000, 0b0100, 0b110), // RW
+    kDBGWVR5_EL1          = encode(0b10, 0b000, 0b0000, 0b0101, 0b110), // RW
+    kDBGWVR6_EL1          = encode(0b10, 0b000, 0b0000, 0b0110, 0b110), // RW
+    kDBGWVR7_EL1          = encode(0b10, 0b000, 0b0000, 0b0111, 0b110), // RW
+    kDBGWVR8_EL1          = encode(0b10, 0b000, 0b0000, 0b1000, 0b110), // RW
+    kDBGWVR9_EL1          = encode(0b10, 0b000, 0b0000, 0b1001, 0b110), // RW
+    kDCZID_EL0            = encode(0b11, 0b011, 0b0000, 0b0000, 0b111), // RO
+    kDISR_EL1             = encode(0b11, 0b000, 0b1100, 0b0001, 0b001), // RW
+    kDIT                  = encode(0b11, 0b011, 0b0100, 0b0010, 0b101), // RW
+    kDLR_EL0              = encode(0b11, 0b011, 0b0100, 0b0101, 0b001), // RW
+    kDSPSR_EL0            = encode(0b11, 0b011, 0b0100, 0b0101, 0b000), // RW
+    kELR_EL1              = encode(0b11, 0b000, 0b0100, 0b0000, 0b001), // RW
+    kELR_EL12             = encode(0b11, 0b101, 0b0100, 0b0000, 0b001), // RW
+    kELR_EL2              = encode(0b11, 0b100, 0b0100, 0b0000, 0b001), // RW
+    kELR_EL3              = encode(0b11, 0b110, 0b0100, 0b0000, 0b001), // RW
+    kERRIDR_EL1           = encode(0b11, 0b000, 0b0101, 0b0011, 0b000), // RO
+    kERRSELR_EL1          = encode(0b11, 0b000, 0b0101, 0b0011, 0b001), // RW
+    kERXADDR_EL1          = encode(0b11, 0b000, 0b0101, 0b0100, 0b011), // RW
+    kERXCTLR_EL1          = encode(0b11, 0b000, 0b0101, 0b0100, 0b001), // RW
+    kERXFR_EL1            = encode(0b11, 0b000, 0b0101, 0b0100, 0b000), // RO
+    kERXMISC0_EL1         = encode(0b11, 0b000, 0b0101, 0b0101, 0b000), // RW
+    kERXMISC1_EL1         = encode(0b11, 0b000, 0b0101, 0b0101, 0b001), // RW
+    kERXMISC2_EL1         = encode(0b11, 0b000, 0b0101, 0b0101, 0b010), // RW
+    kERXMISC3_EL1         = encode(0b11, 0b000, 0b0101, 0b0101, 0b011), // RW
+    kERXPFGCDN_EL1        = encode(0b11, 0b000, 0b0101, 0b0100, 0b110), // RW
+    kERXPFGCTL_EL1        = encode(0b11, 0b000, 0b0101, 0b0100, 0b101), // RW
+    kERXPFGF_EL1          = encode(0b11, 0b000, 0b0101, 0b0100, 0b100), // RO
+    kERXSTATUS_EL1        = encode(0b11, 0b000, 0b0101, 0b0100, 0b010), // RW
+    kESR_EL1              = encode(0b11, 0b000, 0b0101, 0b0010, 0b000), // RW
+    kESR_EL12             = encode(0b11, 0b101, 0b0101, 0b0010, 0b000), // RW
+    kESR_EL2              = encode(0b11, 0b100, 0b0101, 0b0010, 0b000), // RW
+    kESR_EL3              = encode(0b11, 0b110, 0b0101, 0b0010, 0b000), // RW
+    kFAR_EL1              = encode(0b11, 0b000, 0b0110, 0b0000, 0b000), // RW
+    kFAR_EL12             = encode(0b11, 0b101, 0b0110, 0b0000, 0b000), // RW
+    kFAR_EL2              = encode(0b11, 0b100, 0b0110, 0b0000, 0b000), // RW
+    kFAR_EL3              = encode(0b11, 0b110, 0b0110, 0b0000, 0b000), // RW
+    kFPCR                 = encode(0b11, 0b011, 0b0100, 0b0100, 0b000), // RW
+    kFPEXC32_EL2          = encode(0b11, 0b100, 0b0101, 0b0011, 0b000), // RW
+    kFPSR                 = encode(0b11, 0b011, 0b0100, 0b0100, 0b001), // RW
+    kGCR_EL1              = encode(0b11, 0b000, 0b0001, 0b0000, 0b110), // RW
+    kGMID_EL1             = encode(0b11, 0b001, 0b0000, 0b0000, 0b100), // RO
+    kHACR_EL2             = encode(0b11, 0b100, 0b0001, 0b0001, 0b111), // RW
+    kHCR_EL2              = encode(0b11, 0b100, 0b0001, 0b0001, 0b000), // RW
+    kHDFGRTR_EL2          = encode(0b11, 0b100, 0b0011, 0b0001, 0b100), // RW
+    kHDFGWTR_EL2          = encode(0b11, 0b100, 0b0011, 0b0001, 0b101), // RW
+    kHFGITR_EL2           = encode(0b11, 0b100, 0b0001, 0b0001, 0b110), // RW
+    kHFGRTR_EL2           = encode(0b11, 0b100, 0b0001, 0b0001, 0b100), // RW
+    kHFGWTR_EL2           = encode(0b11, 0b100, 0b0001, 0b0001, 0b101), // RW
+    kHPFAR_EL2            = encode(0b11, 0b100, 0b0110, 0b0000, 0b100), // RW
+    kHSTR_EL2             = encode(0b11, 0b100, 0b0001, 0b0001, 0b011), // RW
+    kICC_AP0R0_EL1        = encode(0b11, 0b000, 0b1100, 0b1000, 0b100), // RW
+    kICC_AP0R1_EL1        = encode(0b11, 0b000, 0b1100, 0b1000, 0b101), // RW
+    kICC_AP0R2_EL1        = encode(0b11, 0b000, 0b1100, 0b1000, 0b110), // RW
+    kICC_AP0R3_EL1        = encode(0b11, 0b000, 0b1100, 0b1000, 0b111), // RW
+    kICC_AP1R0_EL1        = encode(0b11, 0b000, 0b1100, 0b1001, 0b000), // RW
+    kICC_AP1R1_EL1        = encode(0b11, 0b000, 0b1100, 0b1001, 0b001), // RW
+    kICC_AP1R2_EL1        = encode(0b11, 0b000, 0b1100, 0b1001, 0b010), // RW
+    kICC_AP1R3_EL1        = encode(0b11, 0b000, 0b1100, 0b1001, 0b011), // RW
+    kICC_ASGI1R_EL1       = encode(0b11, 0b000, 0b1100, 0b1011, 0b110), // WO
+    kICC_BPR0_EL1         = encode(0b11, 0b000, 0b1100, 0b1000, 0b011), // RW
+    kICC_BPR1_EL1         = encode(0b11, 0b000, 0b1100, 0b1100, 0b011), // RW
+    kICC_CTLR_EL1         = encode(0b11, 0b000, 0b1100, 0b1100, 0b100), // RW
+    kICC_CTLR_EL3         = encode(0b11, 0b110, 0b1100, 0b1100, 0b100), // RW
+    kICC_DIR_EL1          = encode(0b11, 0b000, 0b1100, 0b1011, 0b001), // WO
+    kICC_EOIR0_EL1        = encode(0b11, 0b000, 0b1100, 0b1000, 0b001), // WO
+    kICC_EOIR1_EL1        = encode(0b11, 0b000, 0b1100, 0b1100, 0b001), // WO
+    kICC_HPPIR0_EL1       = encode(0b11, 0b000, 0b1100, 0b1000, 0b010), // RO
+    kICC_HPPIR1_EL1       = encode(0b11, 0b000, 0b1100, 0b1100, 0b010), // RO
+    kICC_IAR0_EL1         = encode(0b11, 0b000, 0b1100, 0b1000, 0b000), // RO
+    kICC_IAR1_EL1         = encode(0b11, 0b000, 0b1100, 0b1100, 0b000), // RO
+    kICC_IGRPEN0_EL1      = encode(0b11, 0b000, 0b1100, 0b1100, 0b110), // RW
+    kICC_IGRPEN1_EL1      = encode(0b11, 0b000, 0b1100, 0b1100, 0b111), // RW
+    kICC_IGRPEN1_EL3      = encode(0b11, 0b110, 0b1100, 0b1100, 0b111), // RW
+    kICC_PMR_EL1          = encode(0b11, 0b000, 0b0100, 0b0110, 0b000), // RW
+    kICC_RPR_EL1          = encode(0b11, 0b000, 0b1100, 0b1011, 0b011), // RO
+    kICC_SGI0R_EL1        = encode(0b11, 0b000, 0b1100, 0b1011, 0b111), // WO
+    kICC_SGI1R_EL1        = encode(0b11, 0b000, 0b1100, 0b1011, 0b101), // WO
+    kICC_SRE_EL1          = encode(0b11, 0b000, 0b1100, 0b1100, 0b101), // RW
+    kICC_SRE_EL2          = encode(0b11, 0b100, 0b1100, 0b1001, 0b101), // RW
+    kICC_SRE_EL3          = encode(0b11, 0b110, 0b1100, 0b1100, 0b101), // RW
+    kICH_AP0R0_EL2        = encode(0b11, 0b100, 0b1100, 0b1000, 0b000), // RW
+    kICH_AP0R1_EL2        = encode(0b11, 0b100, 0b1100, 0b1000, 0b001), // RW
+    kICH_AP0R2_EL2        = encode(0b11, 0b100, 0b1100, 0b1000, 0b010), // RW
+    kICH_AP0R3_EL2        = encode(0b11, 0b100, 0b1100, 0b1000, 0b011), // RW
+    kICH_AP1R0_EL2        = encode(0b11, 0b100, 0b1100, 0b1001, 0b000), // RW
+    kICH_AP1R1_EL2        = encode(0b11, 0b100, 0b1100, 0b1001, 0b001), // RW
+    kICH_AP1R2_EL2        = encode(0b11, 0b100, 0b1100, 0b1001, 0b010), // RW
+    kICH_AP1R3_EL2        = encode(0b11, 0b100, 0b1100, 0b1001, 0b011), // RW
+    kICH_EISR_EL2         = encode(0b11, 0b100, 0b1100, 0b1011, 0b011), // RO
+    kICH_ELRSR_EL2        = encode(0b11, 0b100, 0b1100, 0b1011, 0b101), // RO
+    kICH_HCR_EL2          = encode(0b11, 0b100, 0b1100, 0b1011, 0b000), // RW
+    kICH_LR0_EL2          = encode(0b11, 0b100, 0b1100, 0b1100, 0b000), // RW
+    kICH_LR10_EL2         = encode(0b11, 0b100, 0b1100, 0b1101, 0b010), // RW
+    kICH_LR11_EL2         = encode(0b11, 0b100, 0b1100, 0b1101, 0b011), // RW
+    kICH_LR12_EL2         = encode(0b11, 0b100, 0b1100, 0b1101, 0b100), // RW
+    kICH_LR13_EL2         = encode(0b11, 0b100, 0b1100, 0b1101, 0b101), // RW
+    kICH_LR14_EL2         = encode(0b11, 0b100, 0b1100, 0b1101, 0b110), // RW
+    kICH_LR15_EL2         = encode(0b11, 0b100, 0b1100, 0b1101, 0b111), // RW
+    kICH_LR1_EL2          = encode(0b11, 0b100, 0b1100, 0b1100, 0b001), // RW
+    kICH_LR2_EL2          = encode(0b11, 0b100, 0b1100, 0b1100, 0b010), // RW
+    kICH_LR3_EL2          = encode(0b11, 0b100, 0b1100, 0b1100, 0b011), // RW
+    kICH_LR4_EL2          = encode(0b11, 0b100, 0b1100, 0b1100, 0b100), // RW
+    kICH_LR5_EL2          = encode(0b11, 0b100, 0b1100, 0b1100, 0b101), // RW
+    kICH_LR6_EL2          = encode(0b11, 0b100, 0b1100, 0b1100, 0b110), // RW
+    kICH_LR7_EL2          = encode(0b11, 0b100, 0b1100, 0b1100, 0b111), // RW
+    kICH_LR8_EL2          = encode(0b11, 0b100, 0b1100, 0b1101, 0b000), // RW
+    kICH_LR9_EL2          = encode(0b11, 0b100, 0b1100, 0b1101, 0b001), // RW
+    kICH_MISR_EL2         = encode(0b11, 0b100, 0b1100, 0b1011, 0b010), // RO
+    kICH_VMCR_EL2         = encode(0b11, 0b100, 0b1100, 0b1011, 0b111), // RW
+    kICH_VTR_EL2          = encode(0b11, 0b100, 0b1100, 0b1011, 0b001), // RO
+    kID_AA64AFR0_EL1      = encode(0b11, 0b000, 0b0000, 0b0101, 0b100), // RO
+    kID_AA64AFR1_EL1      = encode(0b11, 0b000, 0b0000, 0b0101, 0b101), // RO
+    kID_AA64DFR0_EL1      = encode(0b11, 0b000, 0b0000, 0b0101, 0b000), // RO
+    kID_AA64DFR1_EL1      = encode(0b11, 0b000, 0b0000, 0b0101, 0b001), // RO
+    kID_AA64ISAR0_EL1     = encode(0b11, 0b000, 0b0000, 0b0110, 0b000), // RO
+    kID_AA64ISAR1_EL1     = encode(0b11, 0b000, 0b0000, 0b0110, 0b001), // RO
+    kID_AA64MMFR0_EL1     = encode(0b11, 0b000, 0b0000, 0b0111, 0b000), // RO
+    kID_AA64MMFR1_EL1     = encode(0b11, 0b000, 0b0000, 0b0111, 0b001), // RO
+    kID_AA64MMFR2_EL1     = encode(0b11, 0b000, 0b0000, 0b0111, 0b010), // RO
+    kID_AA64PFR0_EL1      = encode(0b11, 0b000, 0b0000, 0b0100, 0b000), // RO
+    kID_AA64PFR1_EL1      = encode(0b11, 0b000, 0b0000, 0b0100, 0b001), // RO
+    kID_AA64ZFR0_EL1      = encode(0b11, 0b000, 0b0000, 0b0100, 0b100), // RO
+    kID_AFR0_EL1          = encode(0b11, 0b000, 0b0000, 0b0001, 0b011), // RO
+    kID_DFR0_EL1          = encode(0b11, 0b000, 0b0000, 0b0001, 0b010), // RO
+    kID_ISAR0_EL1         = encode(0b11, 0b000, 0b0000, 0b0010, 0b000), // RO
+    kID_ISAR1_EL1         = encode(0b11, 0b000, 0b0000, 0b0010, 0b001), // RO
+    kID_ISAR2_EL1         = encode(0b11, 0b000, 0b0000, 0b0010, 0b010), // RO
+    kID_ISAR3_EL1         = encode(0b11, 0b000, 0b0000, 0b0010, 0b011), // RO
+    kID_ISAR4_EL1         = encode(0b11, 0b000, 0b0000, 0b0010, 0b100), // RO
+    kID_ISAR5_EL1         = encode(0b11, 0b000, 0b0000, 0b0010, 0b101), // RO
+    kID_ISAR6_EL1         = encode(0b11, 0b000, 0b0000, 0b0010, 0b111), // RO
+    kID_MMFR0_EL1         = encode(0b11, 0b000, 0b0000, 0b0001, 0b100), // RO
+    kID_MMFR1_EL1         = encode(0b11, 0b000, 0b0000, 0b0001, 0b101), // RO
+    kID_MMFR2_EL1         = encode(0b11, 0b000, 0b0000, 0b0001, 0b110), // RO
+    kID_MMFR3_EL1         = encode(0b11, 0b000, 0b0000, 0b0001, 0b111), // RO
+    kID_MMFR4_EL1         = encode(0b11, 0b000, 0b0000, 0b0010, 0b110), // RO
+    kID_MMFR5_EL1         = encode(0b11, 0b000, 0b0000, 0b0011, 0b110), // RO
+    kID_PFR0_EL1          = encode(0b11, 0b000, 0b0000, 0b0001, 0b000), // RO
+    kID_PFR1_EL1          = encode(0b11, 0b000, 0b0000, 0b0001, 0b001), // RO
+    kID_PFR2_EL1          = encode(0b11, 0b000, 0b0000, 0b0011, 0b100), // RO
+    kIFSR32_EL2           = encode(0b11, 0b100, 0b0101, 0b0000, 0b001), // RW
+    kISR_EL1              = encode(0b11, 0b000, 0b1100, 0b0001, 0b000), // RO
+    kLORC_EL1             = encode(0b11, 0b000, 0b1010, 0b0100, 0b011), // RW
+    kLOREA_EL1            = encode(0b11, 0b000, 0b1010, 0b0100, 0b001), // RW
+    kLORID_EL1            = encode(0b11, 0b000, 0b1010, 0b0100, 0b111), // RO
+    kLORN_EL1             = encode(0b11, 0b000, 0b1010, 0b0100, 0b010), // RW
+    kLORSA_EL1            = encode(0b11, 0b000, 0b1010, 0b0100, 0b000), // RW
+    kMAIR_EL1             = encode(0b11, 0b000, 0b1010, 0b0010, 0b000), // RW
+    kMAIR_EL12            = encode(0b11, 0b101, 0b1010, 0b0010, 0b000), // RW
+    kMAIR_EL2             = encode(0b11, 0b100, 0b1010, 0b0010, 0b000), // RW
+    kMAIR_EL3             = encode(0b11, 0b110, 0b1010, 0b0010, 0b000), // RW
+    kMDCCINT_EL1          = encode(0b10, 0b000, 0b0000, 0b0010, 0b000), // RW
+    kMDCCSR_EL0           = encode(0b10, 0b011, 0b0000, 0b0001, 0b000), // RO
+    kMDCR_EL2             = encode(0b11, 0b100, 0b0001, 0b0001, 0b001), // RW
+    kMDCR_EL3             = encode(0b11, 0b110, 0b0001, 0b0011, 0b001), // RW
+    kMDRAR_EL1            = encode(0b10, 0b000, 0b0001, 0b0000, 0b000), // RO
+    kMDSCR_EL1            = encode(0b10, 0b000, 0b0000, 0b0010, 0b010), // RW
+    kMIDR_EL1             = encode(0b11, 0b000, 0b0000, 0b0000, 0b000), // RO
+    kMPAM0_EL1            = encode(0b11, 0b000, 0b1010, 0b0101, 0b001), // RW
+    kMPAM1_EL1            = encode(0b11, 0b000, 0b1010, 0b0101, 0b000), // RW
+    kMPAM1_EL12           = encode(0b11, 0b101, 0b1010, 0b0101, 0b000), // RW
+    kMPAM2_EL2            = encode(0b11, 0b100, 0b1010, 0b0101, 0b000), // RW
+    kMPAM3_EL3            = encode(0b11, 0b110, 0b1010, 0b0101, 0b000), // RW
+    kMPAMHCR_EL2          = encode(0b11, 0b100, 0b1010, 0b0100, 0b000), // RW
+    kMPAMIDR_EL1          = encode(0b11, 0b000, 0b1010, 0b0100, 0b100), // RO
+    kMPAMVPM0_EL2         = encode(0b11, 0b100, 0b1010, 0b0110, 0b000), // RW
+    kMPAMVPM1_EL2         = encode(0b11, 0b100, 0b1010, 0b0110, 0b001), // RW
+    kMPAMVPM2_EL2         = encode(0b11, 0b100, 0b1010, 0b0110, 0b010), // RW
+    kMPAMVPM3_EL2         = encode(0b11, 0b100, 0b1010, 0b0110, 0b011), // RW
+    kMPAMVPM4_EL2         = encode(0b11, 0b100, 0b1010, 0b0110, 0b100), // RW
+    kMPAMVPM5_EL2         = encode(0b11, 0b100, 0b1010, 0b0110, 0b101), // RW
+    kMPAMVPM6_EL2         = encode(0b11, 0b100, 0b1010, 0b0110, 0b110), // RW
+    kMPAMVPM7_EL2         = encode(0b11, 0b100, 0b1010, 0b0110, 0b111), // RW
+    kMPAMVPMV_EL2         = encode(0b11, 0b100, 0b1010, 0b0100, 0b001), // RW
+    kMPIDR_EL1            = encode(0b11, 0b000, 0b0000, 0b0000, 0b101), // RO
+    kMVFR0_EL1            = encode(0b11, 0b000, 0b0000, 0b0011, 0b000), // RO
+    kMVFR1_EL1            = encode(0b11, 0b000, 0b0000, 0b0011, 0b001), // RO
+    kMVFR2_EL1            = encode(0b11, 0b000, 0b0000, 0b0011, 0b010), // RO
+    kNZCV                 = encode(0b11, 0b011, 0b0100, 0b0010, 0b000), // RW
+    kOSDLR_EL1            = encode(0b10, 0b000, 0b0001, 0b0011, 0b100), // RW
+    kOSDTRRX_EL1          = encode(0b10, 0b000, 0b0000, 0b0000, 0b010), // RW
+    kOSDTRTX_EL1          = encode(0b10, 0b000, 0b0000, 0b0011, 0b010), // RW
+    kOSECCR_EL1           = encode(0b10, 0b000, 0b0000, 0b0110, 0b010), // RW
+    kOSLAR_EL1            = encode(0b10, 0b000, 0b0001, 0b0000, 0b100), // WO
+    kOSLSR_EL1            = encode(0b10, 0b000, 0b0001, 0b0001, 0b100), // RO
+    kPAN                  = encode(0b11, 0b000, 0b0100, 0b0010, 0b011), // RW
+    kPAR_EL1              = encode(0b11, 0b000, 0b0111, 0b0100, 0b000), // RW
+    kPMBIDR_EL1           = encode(0b11, 0b000, 0b1001, 0b1010, 0b111), // RO
+    kPMBLIMITR_EL1        = encode(0b11, 0b000, 0b1001, 0b1010, 0b000), // RW
+    kPMBPTR_EL1           = encode(0b11, 0b000, 0b1001, 0b1010, 0b001), // RW
+    kPMBSR_EL1            = encode(0b11, 0b000, 0b1001, 0b1010, 0b011), // RW
+    kPMCCFILTR_EL0        = encode(0b11, 0b011, 0b1110, 0b1111, 0b111), // RW
+    kPMCCNTR_EL0          = encode(0b11, 0b011, 0b1001, 0b1101, 0b000), // RW
+    kPMCEID0_EL0          = encode(0b11, 0b011, 0b1001, 0b1100, 0b110), // RO
+    kPMCEID1_EL0          = encode(0b11, 0b011, 0b1001, 0b1100, 0b111), // RO
+    kPMCNTENCLR_EL0       = encode(0b11, 0b011, 0b1001, 0b1100, 0b010), // RW
+    kPMCNTENSET_EL0       = encode(0b11, 0b011, 0b1001, 0b1100, 0b001), // RW
+    kPMCR_EL0             = encode(0b11, 0b011, 0b1001, 0b1100, 0b000), // RW
+    kPMEVCNTR0_EL0        = encode(0b11, 0b011, 0b1110, 0b1000, 0b000), // RW
+    kPMEVCNTR10_EL0       = encode(0b11, 0b011, 0b1110, 0b1001, 0b010), // RW
+    kPMEVCNTR11_EL0       = encode(0b11, 0b011, 0b1110, 0b1001, 0b011), // RW
+    kPMEVCNTR12_EL0       = encode(0b11, 0b011, 0b1110, 0b1001, 0b100), // RW
+    kPMEVCNTR13_EL0       = encode(0b11, 0b011, 0b1110, 0b1001, 0b101), // RW
+    kPMEVCNTR14_EL0       = encode(0b11, 0b011, 0b1110, 0b1001, 0b110), // RW
+    kPMEVCNTR15_EL0       = encode(0b11, 0b011, 0b1110, 0b1001, 0b111), // RW
+    kPMEVCNTR16_EL0       = encode(0b11, 0b011, 0b1110, 0b1010, 0b000), // RW
+    kPMEVCNTR17_EL0       = encode(0b11, 0b011, 0b1110, 0b1010, 0b001), // RW
+    kPMEVCNTR18_EL0       = encode(0b11, 0b011, 0b1110, 0b1010, 0b010), // RW
+    kPMEVCNTR19_EL0       = encode(0b11, 0b011, 0b1110, 0b1010, 0b011), // RW
+    kPMEVCNTR1_EL0        = encode(0b11, 0b011, 0b1110, 0b1000, 0b001), // RW
+    kPMEVCNTR20_EL0       = encode(0b11, 0b011, 0b1110, 0b1010, 0b100), // RW
+    kPMEVCNTR21_EL0       = encode(0b11, 0b011, 0b1110, 0b1010, 0b101), // RW
+    kPMEVCNTR22_EL0       = encode(0b11, 0b011, 0b1110, 0b1010, 0b110), // RW
+    kPMEVCNTR23_EL0       = encode(0b11, 0b011, 0b1110, 0b1010, 0b111), // RW
+    kPMEVCNTR24_EL0       = encode(0b11, 0b011, 0b1110, 0b1011, 0b000), // RW
+    kPMEVCNTR25_EL0       = encode(0b11, 0b011, 0b1110, 0b1011, 0b001), // RW
+    kPMEVCNTR26_EL0       = encode(0b11, 0b011, 0b1110, 0b1011, 0b010), // RW
+    kPMEVCNTR27_EL0       = encode(0b11, 0b011, 0b1110, 0b1011, 0b011), // RW
+    kPMEVCNTR28_EL0       = encode(0b11, 0b011, 0b1110, 0b1011, 0b100), // RW
+    kPMEVCNTR29_EL0       = encode(0b11, 0b011, 0b1110, 0b1011, 0b101), // RW
+    kPMEVCNTR2_EL0        = encode(0b11, 0b011, 0b1110, 0b1000, 0b010), // RW
+    kPMEVCNTR30_EL0       = encode(0b11, 0b011, 0b1110, 0b1011, 0b110), // RW
+    kPMEVCNTR3_EL0        = encode(0b11, 0b011, 0b1110, 0b1000, 0b011), // RW
+    kPMEVCNTR4_EL0        = encode(0b11, 0b011, 0b1110, 0b1000, 0b100), // RW
+    kPMEVCNTR5_EL0        = encode(0b11, 0b011, 0b1110, 0b1000, 0b101), // RW
+    kPMEVCNTR6_EL0        = encode(0b11, 0b011, 0b1110, 0b1000, 0b110), // RW
+    kPMEVCNTR7_EL0        = encode(0b11, 0b011, 0b1110, 0b1000, 0b111), // RW
+    kPMEVCNTR8_EL0        = encode(0b11, 0b011, 0b1110, 0b1001, 0b000), // RW
+    kPMEVCNTR9_EL0        = encode(0b11, 0b011, 0b1110, 0b1001, 0b001), // RW
+    kPMEVTYPER0_EL0       = encode(0b11, 0b011, 0b1110, 0b1100, 0b000), // RW
+    kPMEVTYPER10_EL0      = encode(0b11, 0b011, 0b1110, 0b1101, 0b010), // RW
+    kPMEVTYPER11_EL0      = encode(0b11, 0b011, 0b1110, 0b1101, 0b011), // RW
+    kPMEVTYPER12_EL0      = encode(0b11, 0b011, 0b1110, 0b1101, 0b100), // RW
+    kPMEVTYPER13_EL0      = encode(0b11, 0b011, 0b1110, 0b1101, 0b101), // RW
+    kPMEVTYPER14_EL0      = encode(0b11, 0b011, 0b1110, 0b1101, 0b110), // RW
+    kPMEVTYPER15_EL0      = encode(0b11, 0b011, 0b1110, 0b1101, 0b111), // RW
+    kPMEVTYPER16_EL0      = encode(0b11, 0b011, 0b1110, 0b1110, 0b000), // RW
+    kPMEVTYPER17_EL0      = encode(0b11, 0b011, 0b1110, 0b1110, 0b001), // RW
+    kPMEVTYPER18_EL0      = encode(0b11, 0b011, 0b1110, 0b1110, 0b010), // RW
+    kPMEVTYPER19_EL0      = encode(0b11, 0b011, 0b1110, 0b1110, 0b011), // RW
+    kPMEVTYPER1_EL0       = encode(0b11, 0b011, 0b1110, 0b1100, 0b001), // RW
+    kPMEVTYPER20_EL0      = encode(0b11, 0b011, 0b1110, 0b1110, 0b100), // RW
+    kPMEVTYPER21_EL0      = encode(0b11, 0b011, 0b1110, 0b1110, 0b101), // RW
+    kPMEVTYPER22_EL0      = encode(0b11, 0b011, 0b1110, 0b1110, 0b110), // RW
+    kPMEVTYPER23_EL0      = encode(0b11, 0b011, 0b1110, 0b1110, 0b111), // RW
+    kPMEVTYPER24_EL0      = encode(0b11, 0b011, 0b1110, 0b1111, 0b000), // RW
+    kPMEVTYPER25_EL0      = encode(0b11, 0b011, 0b1110, 0b1111, 0b001), // RW
+    kPMEVTYPER26_EL0      = encode(0b11, 0b011, 0b1110, 0b1111, 0b010), // RW
+    kPMEVTYPER27_EL0      = encode(0b11, 0b011, 0b1110, 0b1111, 0b011), // RW
+    kPMEVTYPER28_EL0      = encode(0b11, 0b011, 0b1110, 0b1111, 0b100), // RW
+    kPMEVTYPER29_EL0      = encode(0b11, 0b011, 0b1110, 0b1111, 0b101), // RW
+    kPMEVTYPER2_EL0       = encode(0b11, 0b011, 0b1110, 0b1100, 0b010), // RW
+    kPMEVTYPER30_EL0      = encode(0b11, 0b011, 0b1110, 0b1111, 0b110), // RW
+    kPMEVTYPER3_EL0       = encode(0b11, 0b011, 0b1110, 0b1100, 0b011), // RW
+    kPMEVTYPER4_EL0       = encode(0b11, 0b011, 0b1110, 0b1100, 0b100), // RW
+    kPMEVTYPER5_EL0       = encode(0b11, 0b011, 0b1110, 0b1100, 0b101), // RW
+    kPMEVTYPER6_EL0       = encode(0b11, 0b011, 0b1110, 0b1100, 0b110), // RW
+    kPMEVTYPER7_EL0       = encode(0b11, 0b011, 0b1110, 0b1100, 0b111), // RW
+    kPMEVTYPER8_EL0       = encode(0b11, 0b011, 0b1110, 0b1101, 0b000), // RW
+    kPMEVTYPER9_EL0       = encode(0b11, 0b011, 0b1110, 0b1101, 0b001), // RW
+    kPMINTENCLR_EL1       = encode(0b11, 0b000, 0b1001, 0b1110, 0b010), // RW
+    kPMINTENSET_EL1       = encode(0b11, 0b000, 0b1001, 0b1110, 0b001), // RW
+    kPMMIR_EL1            = encode(0b11, 0b000, 0b1001, 0b1110, 0b110), // RW
+    kPMOVSCLR_EL0         = encode(0b11, 0b011, 0b1001, 0b1100, 0b011), // RW
+    kPMOVSSET_EL0         = encode(0b11, 0b011, 0b1001, 0b1110, 0b011), // RW
+    kPMSCR_EL1            = encode(0b11, 0b000, 0b1001, 0b1001, 0b000), // RW
+    kPMSCR_EL12           = encode(0b11, 0b101, 0b1001, 0b1001, 0b000), // RW
+    kPMSCR_EL2            = encode(0b11, 0b100, 0b1001, 0b1001, 0b000), // RW
+    kPMSELR_EL0           = encode(0b11, 0b011, 0b1001, 0b1100, 0b101), // RW
+    kPMSEVFR_EL1          = encode(0b11, 0b000, 0b1001, 0b1001, 0b101), // RW
+    kPMSFCR_EL1           = encode(0b11, 0b000, 0b1001, 0b1001, 0b100), // RW
+    kPMSICR_EL1           = encode(0b11, 0b000, 0b1001, 0b1001, 0b010), // RW
+    kPMSIDR_EL1           = encode(0b11, 0b000, 0b1001, 0b1001, 0b111), // RO
+    kPMSIRR_EL1           = encode(0b11, 0b000, 0b1001, 0b1001, 0b011), // RW
+    kPMSLATFR_EL1         = encode(0b11, 0b000, 0b1001, 0b1001, 0b110), // RW
+    kPMSWINC_EL0          = encode(0b11, 0b011, 0b1001, 0b1100, 0b100), // WO
+    kPMUSERENR_EL0        = encode(0b11, 0b011, 0b1001, 0b1110, 0b000), // RW
+    kPMXEVCNTR_EL0        = encode(0b11, 0b011, 0b1001, 0b1101, 0b010), // RW
+    kPMXEVTYPER_EL0       = encode(0b11, 0b011, 0b1001, 0b1101, 0b001), // RW
+    kREVIDR_EL1           = encode(0b11, 0b000, 0b0000, 0b0000, 0b110), // RO
+    kRGSR_EL1             = encode(0b11, 0b000, 0b0001, 0b0000, 0b101), // RW
+    kRMR_EL1              = encode(0b11, 0b000, 0b1100, 0b0000, 0b010), // RW
+    kRMR_EL2              = encode(0b11, 0b100, 0b1100, 0b0000, 0b010), // RW
+    kRMR_EL3              = encode(0b11, 0b110, 0b1100, 0b0000, 0b010), // RW
+    kRNDR                 = encode(0b11, 0b011, 0b0010, 0b0100, 0b000), // RO
+    kRNDRRS               = encode(0b11, 0b011, 0b0010, 0b0100, 0b001), // RO
+    kRVBAR_EL1            = encode(0b11, 0b000, 0b1100, 0b0000, 0b001), // RO
+    kRVBAR_EL2            = encode(0b11, 0b100, 0b1100, 0b0000, 0b001), // RO
+    kRVBAR_EL3            = encode(0b11, 0b110, 0b1100, 0b0000, 0b001), // RO
+    kSCR_EL3              = encode(0b11, 0b110, 0b0001, 0b0001, 0b000), // RW
+    kSCTLR_EL1            = encode(0b11, 0b000, 0b0001, 0b0000, 0b000), // RW
+    kSCTLR_EL12           = encode(0b11, 0b101, 0b0001, 0b0000, 0b000), // RW
+    kSCTLR_EL2            = encode(0b11, 0b100, 0b0001, 0b0000, 0b000), // RW
+    kSCTLR_EL3            = encode(0b11, 0b110, 0b0001, 0b0000, 0b000), // RW
+    kSCXTNUM_EL0          = encode(0b11, 0b011, 0b1101, 0b0000, 0b111), // RW
+    kSCXTNUM_EL1          = encode(0b11, 0b000, 0b1101, 0b0000, 0b111), // RW
+    kSCXTNUM_EL12         = encode(0b11, 0b101, 0b1101, 0b0000, 0b111), // RW
+    kSCXTNUM_EL2          = encode(0b11, 0b100, 0b1101, 0b0000, 0b111), // RW
+    kSCXTNUM_EL3          = encode(0b11, 0b110, 0b1101, 0b0000, 0b111), // RW
+    kSDER32_EL2           = encode(0b11, 0b100, 0b0001, 0b0011, 0b001), // RW
+    kSDER32_EL3           = encode(0b11, 0b110, 0b0001, 0b0001, 0b001), // RW
+    kSPSR_EL1             = encode(0b11, 0b000, 0b0100, 0b0000, 0b000), // RW
+    kSPSR_EL12            = encode(0b11, 0b101, 0b0100, 0b0000, 0b000), // RW
+    kSPSR_EL2             = encode(0b11, 0b100, 0b0100, 0b0000, 0b000), // RW
+    kSPSR_EL3             = encode(0b11, 0b110, 0b0100, 0b0000, 0b000), // RW
+    kSPSR_abt             = encode(0b11, 0b100, 0b0100, 0b0011, 0b001), // RW
+    kSPSR_fiq             = encode(0b11, 0b100, 0b0100, 0b0011, 0b011), // RW
+    kSPSR_irq             = encode(0b11, 0b100, 0b0100, 0b0011, 0b000), // RW
+    kSPSR_und             = encode(0b11, 0b100, 0b0100, 0b0011, 0b010), // RW
+    kSPSel                = encode(0b11, 0b000, 0b0100, 0b0010, 0b000), // RW
+    kSP_EL0               = encode(0b11, 0b000, 0b0100, 0b0001, 0b000), // RW
+    kSP_EL1               = encode(0b11, 0b100, 0b0100, 0b0001, 0b000), // RW
+    kSP_EL2               = encode(0b11, 0b110, 0b0100, 0b0001, 0b000), // RW
+    kSSBS                 = encode(0b11, 0b011, 0b0100, 0b0010, 0b110), // RW
+    kTCO                  = encode(0b11, 0b011, 0b0100, 0b0010, 0b111), // RW
+    kTCR_EL1              = encode(0b11, 0b000, 0b0010, 0b0000, 0b010), // RW
+    kTCR_EL12             = encode(0b11, 0b101, 0b0010, 0b0000, 0b010), // RW
+    kTCR_EL2              = encode(0b11, 0b100, 0b0010, 0b0000, 0b010), // RW
+    kTCR_EL3              = encode(0b11, 0b110, 0b0010, 0b0000, 0b010), // RW
+    kTEECR32_EL1          = encode(0b10, 0b010, 0b0000, 0b0000, 0b000), // RW
+    kTEEHBR32_EL1         = encode(0b10, 0b010, 0b0001, 0b0000, 0b000), // RW
+    kTFSRE0_EL1           = encode(0b11, 0b000, 0b0101, 0b0110, 0b001), // RW
+    kTFSR_EL1             = encode(0b11, 0b000, 0b0101, 0b0110, 0b000), // RW
+    kTFSR_EL12            = encode(0b11, 0b101, 0b0101, 0b0110, 0b000), // RW
+    kTFSR_EL2             = encode(0b11, 0b100, 0b0101, 0b0110, 0b000), // RW
+    kTFSR_EL3             = encode(0b11, 0b110, 0b0101, 0b0110, 0b000), // RW
+    kTPIDRRO_EL0          = encode(0b11, 0b011, 0b1101, 0b0000, 0b011), // RW
+    kTPIDR_EL0            = encode(0b11, 0b011, 0b1101, 0b0000, 0b010), // RW
+    kTPIDR_EL1            = encode(0b11, 0b000, 0b1101, 0b0000, 0b100), // RW
+    kTPIDR_EL2            = encode(0b11, 0b100, 0b1101, 0b0000, 0b010), // RW
+    kTPIDR_EL3            = encode(0b11, 0b110, 0b1101, 0b0000, 0b010), // RW
+    kTRBBASER_EL1         = encode(0b11, 0b000, 0b1001, 0b1011, 0b010), // RW
+    kTRBIDR_EL1           = encode(0b11, 0b000, 0b1001, 0b1011, 0b111), // RO
+    kTRBLIMITR_EL1        = encode(0b11, 0b000, 0b1001, 0b1011, 0b000), // RW
+    kTRBMAR_EL1           = encode(0b11, 0b000, 0b1001, 0b1011, 0b100), // RW
+    kTRBPTR_EL1           = encode(0b11, 0b000, 0b1001, 0b1011, 0b001), // RW
+    kTRBSR_EL1            = encode(0b11, 0b000, 0b1001, 0b1011, 0b011), // RW
+    kTRBTRG_EL1           = encode(0b11, 0b000, 0b1001, 0b1011, 0b110), // RW
+    kTRCACATR0            = encode(0b10, 0b001, 0b0010, 0b0000, 0b010), // RW
+    kTRCACATR1            = encode(0b10, 0b001, 0b0010, 0b0010, 0b010), // RW
+    kTRCACATR10           = encode(0b10, 0b001, 0b0010, 0b0100, 0b011), // RW
+    kTRCACATR11           = encode(0b10, 0b001, 0b0010, 0b0110, 0b011), // RW
+    kTRCACATR12           = encode(0b10, 0b001, 0b0010, 0b1000, 0b011), // RW
+    kTRCACATR13           = encode(0b10, 0b001, 0b0010, 0b1010, 0b011), // RW
+    kTRCACATR14           = encode(0b10, 0b001, 0b0010, 0b1100, 0b011), // RW
+    kTRCACATR15           = encode(0b10, 0b001, 0b0010, 0b1110, 0b011), // RW
+    kTRCACATR2            = encode(0b10, 0b001, 0b0010, 0b0100, 0b010), // RW
+    kTRCACATR3            = encode(0b10, 0b001, 0b0010, 0b0110, 0b010), // RW
+    kTRCACATR4            = encode(0b10, 0b001, 0b0010, 0b1000, 0b010), // RW
+    kTRCACATR5            = encode(0b10, 0b001, 0b0010, 0b1010, 0b010), // RW
+    kTRCACATR6            = encode(0b10, 0b001, 0b0010, 0b1100, 0b010), // RW
+    kTRCACATR7            = encode(0b10, 0b001, 0b0010, 0b1110, 0b010), // RW
+    kTRCACATR8            = encode(0b10, 0b001, 0b0010, 0b0000, 0b011), // RW
+    kTRCACATR9            = encode(0b10, 0b001, 0b0010, 0b0010, 0b011), // RW
+    kTRCACVR0             = encode(0b10, 0b001, 0b0010, 0b0000, 0b000), // RW
+    kTRCACVR1             = encode(0b10, 0b001, 0b0010, 0b0010, 0b000), // RW
+    kTRCACVR10            = encode(0b10, 0b001, 0b0010, 0b0100, 0b001), // RW
+    kTRCACVR11            = encode(0b10, 0b001, 0b0010, 0b0110, 0b001), // RW
+    kTRCACVR12            = encode(0b10, 0b001, 0b0010, 0b1000, 0b001), // RW
+    kTRCACVR13            = encode(0b10, 0b001, 0b0010, 0b1010, 0b001), // RW
+    kTRCACVR14            = encode(0b10, 0b001, 0b0010, 0b1100, 0b001), // RW
+    kTRCACVR15            = encode(0b10, 0b001, 0b0010, 0b1110, 0b001), // RW
+    kTRCACVR2             = encode(0b10, 0b001, 0b0010, 0b0100, 0b000), // RW
+    kTRCACVR3             = encode(0b10, 0b001, 0b0010, 0b0110, 0b000), // RW
+    kTRCACVR4             = encode(0b10, 0b001, 0b0010, 0b1000, 0b000), // RW
+    kTRCACVR5             = encode(0b10, 0b001, 0b0010, 0b1010, 0b000), // RW
+    kTRCACVR6             = encode(0b10, 0b001, 0b0010, 0b1100, 0b000), // RW
+    kTRCACVR7             = encode(0b10, 0b001, 0b0010, 0b1110, 0b000), // RW
+    kTRCACVR8             = encode(0b10, 0b001, 0b0010, 0b0000, 0b001), // RW
+    kTRCACVR9             = encode(0b10, 0b001, 0b0010, 0b0010, 0b001), // RW
+    kTRCAUTHSTATUS        = encode(0b10, 0b001, 0b0111, 0b1110, 0b110), // RO
+    kTRCAUXCTLR           = encode(0b10, 0b001, 0b0000, 0b0110, 0b000), // RW
+    kTRCBBCTLR            = encode(0b10, 0b001, 0b0000, 0b1111, 0b000), // RW
+    kTRCCCCTLR            = encode(0b10, 0b001, 0b0000, 0b1110, 0b000), // RW
+    kTRCCIDCCTLR0         = encode(0b10, 0b001, 0b0011, 0b0000, 0b010), // RW
+    kTRCCIDCCTLR1         = encode(0b10, 0b001, 0b0011, 0b0001, 0b010), // RW
+    kTRCCIDCVR0           = encode(0b10, 0b001, 0b0011, 0b0000, 0b000), // RW
+    kTRCCIDCVR1           = encode(0b10, 0b001, 0b0011, 0b0010, 0b000), // RW
+    kTRCCIDCVR2           = encode(0b10, 0b001, 0b0011, 0b0100, 0b000), // RW
+    kTRCCIDCVR3           = encode(0b10, 0b001, 0b0011, 0b0110, 0b000), // RW
+    kTRCCIDCVR4           = encode(0b10, 0b001, 0b0011, 0b1000, 0b000), // RW
+    kTRCCIDCVR5           = encode(0b10, 0b001, 0b0011, 0b1010, 0b000), // RW
+    kTRCCIDCVR6           = encode(0b10, 0b001, 0b0011, 0b1100, 0b000), // RW
+    kTRCCIDCVR7           = encode(0b10, 0b001, 0b0011, 0b1110, 0b000), // RW
+    kTRCCIDR0             = encode(0b10, 0b001, 0b0111, 0b1100, 0b111), // RO
+    kTRCCIDR1             = encode(0b10, 0b001, 0b0111, 0b1101, 0b111), // RO
+    kTRCCIDR2             = encode(0b10, 0b001, 0b0111, 0b1110, 0b111), // RO
+    kTRCCIDR3             = encode(0b10, 0b001, 0b0111, 0b1111, 0b111), // RO
+    kTRCCLAIMCLR          = encode(0b10, 0b001, 0b0111, 0b1001, 0b110), // RW
+    kTRCCLAIMSET          = encode(0b10, 0b001, 0b0111, 0b1000, 0b110), // RW
+    kTRCCNTCTLR0          = encode(0b10, 0b001, 0b0000, 0b0100, 0b101), // RW
+    kTRCCNTCTLR1          = encode(0b10, 0b001, 0b0000, 0b0101, 0b101), // RW
+    kTRCCNTCTLR2          = encode(0b10, 0b001, 0b0000, 0b0110, 0b101), // RW
+    kTRCCNTCTLR3          = encode(0b10, 0b001, 0b0000, 0b0111, 0b101), // RW
+    kTRCCNTRLDVR0         = encode(0b10, 0b001, 0b0000, 0b0000, 0b101), // RW
+    kTRCCNTRLDVR1         = encode(0b10, 0b001, 0b0000, 0b0001, 0b101), // RW
+    kTRCCNTRLDVR2         = encode(0b10, 0b001, 0b0000, 0b0010, 0b101), // RW
+    kTRCCNTRLDVR3         = encode(0b10, 0b001, 0b0000, 0b0011, 0b101), // RW
+    kTRCCNTVR0            = encode(0b10, 0b001, 0b0000, 0b1000, 0b101), // RW
+    kTRCCNTVR1            = encode(0b10, 0b001, 0b0000, 0b1001, 0b101), // RW
+    kTRCCNTVR2            = encode(0b10, 0b001, 0b0000, 0b1010, 0b101), // RW
+    kTRCCNTVR3            = encode(0b10, 0b001, 0b0000, 0b1011, 0b101), // RW
+    kTRCCONFIGR           = encode(0b10, 0b001, 0b0000, 0b0100, 0b000), // RW
+    kTRCDEVAFF0           = encode(0b10, 0b001, 0b0111, 0b1010, 0b110), // RO
+    kTRCDEVAFF1           = encode(0b10, 0b001, 0b0111, 0b1011, 0b110), // RO
+    kTRCDEVARCH           = encode(0b10, 0b001, 0b0111, 0b1111, 0b110), // RO
+    kTRCDEVID             = encode(0b10, 0b001, 0b0111, 0b0010, 0b111), // RO
+    kTRCDEVTYPE           = encode(0b10, 0b001, 0b0111, 0b0011, 0b111), // RO
+    kTRCDVCMR0            = encode(0b10, 0b001, 0b0010, 0b0000, 0b110), // RW
+    kTRCDVCMR1            = encode(0b10, 0b001, 0b0010, 0b0100, 0b110), // RW
+    kTRCDVCMR2            = encode(0b10, 0b001, 0b0010, 0b1000, 0b110), // RW
+    kTRCDVCMR3            = encode(0b10, 0b001, 0b0010, 0b1100, 0b110), // RW
+    kTRCDVCMR4            = encode(0b10, 0b001, 0b0010, 0b0000, 0b111), // RW
+    kTRCDVCMR5            = encode(0b10, 0b001, 0b0010, 0b0100, 0b111), // RW
+    kTRCDVCMR6            = encode(0b10, 0b001, 0b0010, 0b1000, 0b111), // RW
+    kTRCDVCMR7            = encode(0b10, 0b001, 0b0010, 0b1100, 0b111), // RW
+    kTRCDVCVR0            = encode(0b10, 0b001, 0b0010, 0b0000, 0b100), // RW
+    kTRCDVCVR1            = encode(0b10, 0b001, 0b0010, 0b0100, 0b100), // RW
+    kTRCDVCVR2            = encode(0b10, 0b001, 0b0010, 0b1000, 0b100), // RW
+    kTRCDVCVR3            = encode(0b10, 0b001, 0b0010, 0b1100, 0b100), // RW
+    kTRCDVCVR4            = encode(0b10, 0b001, 0b0010, 0b0000, 0b101), // RW
+    kTRCDVCVR5            = encode(0b10, 0b001, 0b0010, 0b0100, 0b101), // RW
+    kTRCDVCVR6            = encode(0b10, 0b001, 0b0010, 0b1000, 0b101), // RW
+    kTRCDVCVR7            = encode(0b10, 0b001, 0b0010, 0b1100, 0b101), // RW
+    kTRCEVENTCTL0R        = encode(0b10, 0b001, 0b0000, 0b1000, 0b000), // RW
+    kTRCEVENTCTL1R        = encode(0b10, 0b001, 0b0000, 0b1001, 0b000), // RW
+    kTRCEXTINSELR         = encode(0b10, 0b001, 0b0000, 0b1000, 0b100), // RW
+    kTRCEXTINSELR0        = encode(0b10, 0b001, 0b0000, 0b1000, 0b100), // RW
+    kTRCEXTINSELR1        = encode(0b10, 0b001, 0b0000, 0b1001, 0b100), // RW
+    kTRCEXTINSELR2        = encode(0b10, 0b001, 0b0000, 0b1010, 0b100), // RW
+    kTRCEXTINSELR3        = encode(0b10, 0b001, 0b0000, 0b1011, 0b100), // RW
+    kTRCIDR0              = encode(0b10, 0b001, 0b0000, 0b1000, 0b111), // RO
+    kTRCIDR1              = encode(0b10, 0b001, 0b0000, 0b1001, 0b111), // RO
+    kTRCIDR10             = encode(0b10, 0b001, 0b0000, 0b0010, 0b110), // RO
+    kTRCIDR11             = encode(0b10, 0b001, 0b0000, 0b0011, 0b110), // RO
+    kTRCIDR12             = encode(0b10, 0b001, 0b0000, 0b0100, 0b110), // RO
+    kTRCIDR13             = encode(0b10, 0b001, 0b0000, 0b0101, 0b110), // RO
+    kTRCIDR2              = encode(0b10, 0b001, 0b0000, 0b1010, 0b111), // RO
+    kTRCIDR3              = encode(0b10, 0b001, 0b0000, 0b1011, 0b111), // RO
+    kTRCIDR4              = encode(0b10, 0b001, 0b0000, 0b1100, 0b111), // RO
+    kTRCIDR5              = encode(0b10, 0b001, 0b0000, 0b1101, 0b111), // RO
+    kTRCIDR6              = encode(0b10, 0b001, 0b0000, 0b1110, 0b111), // RO
+    kTRCIDR7              = encode(0b10, 0b001, 0b0000, 0b1111, 0b111), // RO
+    kTRCIDR8              = encode(0b10, 0b001, 0b0000, 0b0000, 0b110), // RO
+    kTRCIDR9              = encode(0b10, 0b001, 0b0000, 0b0001, 0b110), // RO
+    kTRCIMSPEC0           = encode(0b10, 0b001, 0b0000, 0b0000, 0b111), // RW
+    kTRCIMSPEC1           = encode(0b10, 0b001, 0b0000, 0b0001, 0b111), // RW
+    kTRCIMSPEC2           = encode(0b10, 0b001, 0b0000, 0b0010, 0b111), // RW
+    kTRCIMSPEC3           = encode(0b10, 0b001, 0b0000, 0b0011, 0b111), // RW
+    kTRCIMSPEC4           = encode(0b10, 0b001, 0b0000, 0b0100, 0b111), // RW
+    kTRCIMSPEC5           = encode(0b10, 0b001, 0b0000, 0b0101, 0b111), // RW
+    kTRCIMSPEC6           = encode(0b10, 0b001, 0b0000, 0b0110, 0b111), // RW
+    kTRCIMSPEC7           = encode(0b10, 0b001, 0b0000, 0b0111, 0b111), // RW
+    kTRCITCTRL            = encode(0b10, 0b001, 0b0111, 0b0000, 0b100), // RW
+    kTRCLAR               = encode(0b10, 0b001, 0b0111, 0b1100, 0b110), // WO
+    kTRCLSR               = encode(0b10, 0b001, 0b0111, 0b1101, 0b110), // RO
+    kTRCOSLAR             = encode(0b10, 0b001, 0b0001, 0b0000, 0b100), // WO
+    kTRCOSLSR             = encode(0b10, 0b001, 0b0001, 0b0001, 0b100), // RO
+    kTRCPDCR              = encode(0b10, 0b001, 0b0001, 0b0100, 0b100), // RW
+    kTRCPDSR              = encode(0b10, 0b001, 0b0001, 0b0101, 0b100), // RO
+    kTRCPIDR0             = encode(0b10, 0b001, 0b0111, 0b1000, 0b111), // RO
+    kTRCPIDR1             = encode(0b10, 0b001, 0b0111, 0b1001, 0b111), // RO
+    kTRCPIDR2             = encode(0b10, 0b001, 0b0111, 0b1010, 0b111), // RO
+    kTRCPIDR3             = encode(0b10, 0b001, 0b0111, 0b1011, 0b111), // RO
+    kTRCPIDR4             = encode(0b10, 0b001, 0b0111, 0b0100, 0b111), // RO
+    kTRCPIDR5             = encode(0b10, 0b001, 0b0111, 0b0101, 0b111), // RO
+    kTRCPIDR6             = encode(0b10, 0b001, 0b0111, 0b0110, 0b111), // RO
+    kTRCPIDR7             = encode(0b10, 0b001, 0b0111, 0b0111, 0b111), // RO
+    kTRCPRGCTLR           = encode(0b10, 0b001, 0b0000, 0b0001, 0b000), // RW
+    kTRCPROCSELR          = encode(0b10, 0b001, 0b0000, 0b0010, 0b000), // RW
+    kTRCQCTLR             = encode(0b10, 0b001, 0b0000, 0b0001, 0b001), // RW
+    kTRCRSCTLR10          = encode(0b10, 0b001, 0b0001, 0b1010, 0b000), // RW
+    kTRCRSCTLR11          = encode(0b10, 0b001, 0b0001, 0b1011, 0b000), // RW
+    kTRCRSCTLR12          = encode(0b10, 0b001, 0b0001, 0b1100, 0b000), // RW
+    kTRCRSCTLR13          = encode(0b10, 0b001, 0b0001, 0b1101, 0b000), // RW
+    kTRCRSCTLR14          = encode(0b10, 0b001, 0b0001, 0b1110, 0b000), // RW
+    kTRCRSCTLR15          = encode(0b10, 0b001, 0b0001, 0b1111, 0b000), // RW
+    kTRCRSCTLR16          = encode(0b10, 0b001, 0b0001, 0b0000, 0b001), // RW
+    kTRCRSCTLR17          = encode(0b10, 0b001, 0b0001, 0b0001, 0b001), // RW
+    kTRCRSCTLR18          = encode(0b10, 0b001, 0b0001, 0b0010, 0b001), // RW
+    kTRCRSCTLR19          = encode(0b10, 0b001, 0b0001, 0b0011, 0b001), // RW
+    kTRCRSCTLR2           = encode(0b10, 0b001, 0b0001, 0b0010, 0b000), // RW
+    kTRCRSCTLR20          = encode(0b10, 0b001, 0b0001, 0b0100, 0b001), // RW
+    kTRCRSCTLR21          = encode(0b10, 0b001, 0b0001, 0b0101, 0b001), // RW
+    kTRCRSCTLR22          = encode(0b10, 0b001, 0b0001, 0b0110, 0b001), // RW
+    kTRCRSCTLR23          = encode(0b10, 0b001, 0b0001, 0b0111, 0b001), // RW
+    kTRCRSCTLR24          = encode(0b10, 0b001, 0b0001, 0b1000, 0b001), // RW
+    kTRCRSCTLR25          = encode(0b10, 0b001, 0b0001, 0b1001, 0b001), // RW
+    kTRCRSCTLR26          = encode(0b10, 0b001, 0b0001, 0b1010, 0b001), // RW
+    kTRCRSCTLR27          = encode(0b10, 0b001, 0b0001, 0b1011, 0b001), // RW
+    kTRCRSCTLR28          = encode(0b10, 0b001, 0b0001, 0b1100, 0b001), // RW
+    kTRCRSCTLR29          = encode(0b10, 0b001, 0b0001, 0b1101, 0b001), // RW
+    kTRCRSCTLR3           = encode(0b10, 0b001, 0b0001, 0b0011, 0b000), // RW
+    kTRCRSCTLR30          = encode(0b10, 0b001, 0b0001, 0b1110, 0b001), // RW
+    kTRCRSCTLR31          = encode(0b10, 0b001, 0b0001, 0b1111, 0b001), // RW
+    kTRCRSCTLR4           = encode(0b10, 0b001, 0b0001, 0b0100, 0b000), // RW
+    kTRCRSCTLR5           = encode(0b10, 0b001, 0b0001, 0b0101, 0b000), // RW
+    kTRCRSCTLR6           = encode(0b10, 0b001, 0b0001, 0b0110, 0b000), // RW
+    kTRCRSCTLR7           = encode(0b10, 0b001, 0b0001, 0b0111, 0b000), // RW
+    kTRCRSCTLR8           = encode(0b10, 0b001, 0b0001, 0b1000, 0b000), // RW
+    kTRCRSCTLR9           = encode(0b10, 0b001, 0b0001, 0b1001, 0b000), // RW
+    kTRCRSR               = encode(0b10, 0b001, 0b0000, 0b1010, 0b000), // RW
+    kTRCSEQEVR0           = encode(0b10, 0b001, 0b0000, 0b0000, 0b100), // RW
+    kTRCSEQEVR1           = encode(0b10, 0b001, 0b0000, 0b0001, 0b100), // RW
+    kTRCSEQEVR2           = encode(0b10, 0b001, 0b0000, 0b0010, 0b100), // RW
+    kTRCSEQRSTEVR         = encode(0b10, 0b001, 0b0000, 0b0110, 0b100), // RW
+    kTRCSEQSTR            = encode(0b10, 0b001, 0b0000, 0b0111, 0b100), // RW
+    kTRCSSCCR0            = encode(0b10, 0b001, 0b0001, 0b0000, 0b010), // RW
+    kTRCSSCCR1            = encode(0b10, 0b001, 0b0001, 0b0001, 0b010), // RW
+    kTRCSSCCR2            = encode(0b10, 0b001, 0b0001, 0b0010, 0b010), // RW
+    kTRCSSCCR3            = encode(0b10, 0b001, 0b0001, 0b0011, 0b010), // RW
+    kTRCSSCCR4            = encode(0b10, 0b001, 0b0001, 0b0100, 0b010), // RW
+    kTRCSSCCR5            = encode(0b10, 0b001, 0b0001, 0b0101, 0b010), // RW
+    kTRCSSCCR6            = encode(0b10, 0b001, 0b0001, 0b0110, 0b010), // RW
+    kTRCSSCCR7            = encode(0b10, 0b001, 0b0001, 0b0111, 0b010), // RW
+    kTRCSSCSR0            = encode(0b10, 0b001, 0b0001, 0b1000, 0b010), // RW
+    kTRCSSCSR1            = encode(0b10, 0b001, 0b0001, 0b1001, 0b010), // RW
+    kTRCSSCSR2            = encode(0b10, 0b001, 0b0001, 0b1010, 0b010), // RW
+    kTRCSSCSR3            = encode(0b10, 0b001, 0b0001, 0b1011, 0b010), // RW
+    kTRCSSCSR4            = encode(0b10, 0b001, 0b0001, 0b1100, 0b010), // RW
+    kTRCSSCSR5            = encode(0b10, 0b001, 0b0001, 0b1101, 0b010), // RW
+    kTRCSSCSR6            = encode(0b10, 0b001, 0b0001, 0b1110, 0b010), // RW
+    kTRCSSCSR7            = encode(0b10, 0b001, 0b0001, 0b1111, 0b010), // RW
+    kTRCSSPCICR0          = encode(0b10, 0b001, 0b0001, 0b0000, 0b011), // RW
+    kTRCSSPCICR1          = encode(0b10, 0b001, 0b0001, 0b0001, 0b011), // RW
+    kTRCSSPCICR2          = encode(0b10, 0b001, 0b0001, 0b0010, 0b011), // RW
+    kTRCSSPCICR3          = encode(0b10, 0b001, 0b0001, 0b0011, 0b011), // RW
+    kTRCSSPCICR4          = encode(0b10, 0b001, 0b0001, 0b0100, 0b011), // RW
+    kTRCSSPCICR5          = encode(0b10, 0b001, 0b0001, 0b0101, 0b011), // RW
+    kTRCSSPCICR6          = encode(0b10, 0b001, 0b0001, 0b0110, 0b011), // RW
+    kTRCSSPCICR7          = encode(0b10, 0b001, 0b0001, 0b0111, 0b011), // RW
+    kTRCSTALLCTLR         = encode(0b10, 0b001, 0b0000, 0b1011, 0b000), // RW
+    kTRCSTATR             = encode(0b10, 0b001, 0b0000, 0b0011, 0b000), // RO
+    kTRCSYNCPR            = encode(0b10, 0b001, 0b0000, 0b1101, 0b000), // RW
+    kTRCTRACEIDR          = encode(0b10, 0b001, 0b0000, 0b0000, 0b001), // RW
+    kTRCTSCTLR            = encode(0b10, 0b001, 0b0000, 0b1100, 0b000), // RW
+    kTRCVDARCCTLR         = encode(0b10, 0b001, 0b0000, 0b1010, 0b010), // RW
+    kTRCVDCTLR            = encode(0b10, 0b001, 0b0000, 0b1000, 0b010), // RW
+    kTRCVDSACCTLR         = encode(0b10, 0b001, 0b0000, 0b1001, 0b010), // RW
+    kTRCVICTLR            = encode(0b10, 0b001, 0b0000, 0b0000, 0b010), // RW
+    kTRCVIIECTLR          = encode(0b10, 0b001, 0b0000, 0b0001, 0b010), // RW
+    kTRCVIPCSSCTLR        = encode(0b10, 0b001, 0b0000, 0b0011, 0b010), // RW
+    kTRCVISSCTLR          = encode(0b10, 0b001, 0b0000, 0b0010, 0b010), // RW
+    kTRCVMIDCCTLR0        = encode(0b10, 0b001, 0b0011, 0b0010, 0b010), // RW
+    kTRCVMIDCCTLR1        = encode(0b10, 0b001, 0b0011, 0b0011, 0b010), // RW
+    kTRCVMIDCVR0          = encode(0b10, 0b001, 0b0011, 0b0000, 0b001), // RW
+    kTRCVMIDCVR1          = encode(0b10, 0b001, 0b0011, 0b0010, 0b001), // RW
+    kTRCVMIDCVR2          = encode(0b10, 0b001, 0b0011, 0b0100, 0b001), // RW
+    kTRCVMIDCVR3          = encode(0b10, 0b001, 0b0011, 0b0110, 0b001), // RW
+    kTRCVMIDCVR4          = encode(0b10, 0b001, 0b0011, 0b1000, 0b001), // RW
+    kTRCVMIDCVR5          = encode(0b10, 0b001, 0b0011, 0b1010, 0b001), // RW
+    kTRCVMIDCVR6          = encode(0b10, 0b001, 0b0011, 0b1100, 0b001), // RW
+    kTRCVMIDCVR7          = encode(0b10, 0b001, 0b0011, 0b1110, 0b001), // RW
+    kTRFCR_EL1            = encode(0b11, 0b000, 0b0001, 0b0010, 0b001), // RW
+    kTRFCR_EL12           = encode(0b11, 0b101, 0b0001, 0b0010, 0b001), // RW
+    kTRFCR_EL2            = encode(0b11, 0b100, 0b0001, 0b0010, 0b001), // RW
+    kTTBR0_EL1            = encode(0b11, 0b000, 0b0010, 0b0000, 0b000), // RW
+    kTTBR0_EL12           = encode(0b11, 0b101, 0b0010, 0b0000, 0b000), // RW
+    kTTBR0_EL2            = encode(0b11, 0b100, 0b0010, 0b0000, 0b000), // RW
+    kTTBR0_EL3            = encode(0b11, 0b110, 0b0010, 0b0000, 0b000), // RW
+    kTTBR1_EL1            = encode(0b11, 0b000, 0b0010, 0b0000, 0b001), // RW
+    kTTBR1_EL12           = encode(0b11, 0b101, 0b0010, 0b0000, 0b001), // RW
+    kTTBR1_EL2            = encode(0b11, 0b100, 0b0010, 0b0000, 0b001), // RW
+    kUAO                  = encode(0b11, 0b000, 0b0100, 0b0010, 0b100), // RW
+    kVBAR_EL1             = encode(0b11, 0b000, 0b1100, 0b0000, 0b000), // RW
+    kVBAR_EL12            = encode(0b11, 0b101, 0b1100, 0b0000, 0b000), // RW
+    kVBAR_EL2             = encode(0b11, 0b100, 0b1100, 0b0000, 0b000), // RW
+    kVBAR_EL3             = encode(0b11, 0b110, 0b1100, 0b0000, 0b000), // RW
+    kVDISR_EL2            = encode(0b11, 0b100, 0b1100, 0b0001, 0b001), // RW
+    kVMPIDR_EL2           = encode(0b11, 0b100, 0b0000, 0b0000, 0b101), // RW
+    kVNCR_EL2             = encode(0b11, 0b100, 0b0010, 0b0010, 0b000), // RW
+    kVPIDR_EL2            = encode(0b11, 0b100, 0b0000, 0b0000, 0b000), // RW
+    kVSESR_EL2            = encode(0b11, 0b100, 0b0101, 0b0010, 0b011), // RW
+    kVSTCR_EL2            = encode(0b11, 0b100, 0b0010, 0b0110, 0b010), // RW
+    kVSTTBR_EL2           = encode(0b11, 0b100, 0b0010, 0b0110, 0b000), // RW
+    kVTCR_EL2             = encode(0b11, 0b100, 0b0010, 0b0001, 0b010), // RW
+    kVTTBR_EL2            = encode(0b11, 0b100, 0b0010, 0b0001, 0b000), // RW
+    kZCR_EL1              = encode(0b11, 0b000, 0b0001, 0b0010, 0b000), // RW
+    kZCR_EL12             = encode(0b11, 0b101, 0b0001, 0b0010, 0b000), // RW
+    kZCR_EL2              = encode(0b11, 0b100, 0b0001, 0b0010, 0b000), // RW
+    kZCR_EL3              = encode(0b11, 0b110, 0b0001, 0b0010, 0b000)  // RW
+  };
+};
+
+} // {Predicate}
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64GLOBALS_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64instapi.cpp b/lib/lepton/asmjit/arm/a64instapi.cpp
new file mode 100644
index 0000000000..d933d4bd7f
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64instapi.cpp
@@ -0,0 +1,278 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64)
+
+#include "../core/cpuinfo.h"
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../arm/a64instapi_p.h"
+#include "../arm/a64instdb_p.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::InstInternal - Text
+// ========================
+
+#ifndef ASMJIT_NO_TEXT
+Error InstInternal::instIdToString(Arch arch, InstId instId, String& output) noexcept {
+  uint32_t realId = instId & uint32_t(InstIdParts::kRealId);
+  DebugUtils::unused(arch);
+
+  if (ASMJIT_UNLIKELY(!Inst::isDefinedId(realId)))
+    return DebugUtils::errored(kErrorInvalidInstruction);
+
+  const InstDB::InstInfo& info = InstDB::infoById(realId);
+  return output.append(InstDB::_nameData + info._nameDataIndex);
+}
+
+InstId InstInternal::stringToInstId(Arch arch, const char* s, size_t len) noexcept {
+  DebugUtils::unused(arch);
+
+  if (ASMJIT_UNLIKELY(!s))
+    return Inst::kIdNone;
+
+  if (len == SIZE_MAX)
+    len = strlen(s);
+
+  if (ASMJIT_UNLIKELY(len == 0 || len > InstDB::kMaxNameSize))
+    return Inst::kIdNone;
+
+  uint32_t prefix = uint32_t(s[0]) - 'a';
+  if (ASMJIT_UNLIKELY(prefix > 'z' - 'a'))
+    return Inst::kIdNone;
+
+  uint32_t index = InstDB::instNameIndex[prefix].start;
+  if (ASMJIT_UNLIKELY(!index))
+    return Inst::kIdNone;
+
+  const char* nameData = InstDB::_nameData;
+  const InstDB::InstInfo* table = InstDB::_instInfoTable;
+
+  const InstDB::InstInfo* base = table + index;
+  const InstDB::InstInfo* end  = table + InstDB::instNameIndex[prefix].end;
+
+  for (size_t lim = (size_t)(end - base); lim != 0; lim >>= 1) {
+    const InstDB::InstInfo* cur = base + (lim >> 1);
+    int result = Support::cmpInstName(nameData + cur[0]._nameDataIndex, s, len);
+
+    if (result < 0) {
+      base = cur + 1;
+      lim--;
+      continue;
+    }
+
+    if (result > 0)
+      continue;
+
+    return uint32_t((size_t)(cur - table));
+  }
+
+  return Inst::kIdNone;
+}
+#endif // !ASMJIT_NO_TEXT
+
+// a64::InstInternal - Validate
+// ============================
+
+#ifndef ASMJIT_NO_VALIDATION
+ASMJIT_FAVOR_SIZE Error InstInternal::validate(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags) noexcept {
+  // TODO:
+  DebugUtils::unused(arch, inst, operands, opCount, validationFlags);
+  return kErrorOk;
+}
+#endif // !ASMJIT_NO_VALIDATION
+
+// a64::InstInternal - QueryRWInfo
+// ===============================
+
+#ifndef ASMJIT_NO_INTROSPECTION
+struct InstRWInfoData {
+  uint8_t rwx[Globals::kMaxOpCount];
+};
+
+static const InstRWInfoData instRWInfoData[] = {
+  #define R uint8_t(OpRWFlags::kRead)
+  #define W uint8_t(OpRWFlags::kWrite)
+  #define X uint8_t(OpRWFlags::kRW)
+
+  {{ R, R, R, R, R, R }}, // kRWI_R
+  {{ R, W, R, R, R, R }}, // kRWI_RW
+  {{ R, X, R, R, R, R }}, // kRWI_RX
+  {{ R, R, W, R, R, R }}, // kRWI_RRW
+  {{ R, W, X, R, R, R }}, // kRWI_RWX
+  {{ W, R, R, R, R, R }}, // kRWI_W
+  {{ W, R, W, R, R, R }}, // kRWI_WRW
+  {{ W, R, X, R, R, R }}, // kRWI_WRX
+  {{ W, R, R, W, R, R }}, // kRWI_WRRW
+  {{ W, R, R, X, R, R }}, // kRWI_WRRX
+  {{ W, W, R, R, R, R }}, // kRWI_WW
+  {{ X, R, R, R, R, R }}, // kRWI_X
+  {{ X, R, X, R, R, R }}, // kRWI_XRX
+  {{ X, X, R, R, X, R }}, // kRWI_XXRRX
+
+  {{ W, R, R, R, R, R }}, // kRWI_LDn
+  {{ R, W, R, R, R, R }}, // kRWI_STn
+  {{ R, R, R, R, R, R }}  // kRWI_TODO
+
+  #undef R
+  #undef W
+  #undef X
+};
+
+static const uint8_t elementTypeSize[8] = { 0, 1, 2, 4, 8, 4, 4, 0 };
+
+Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept {
+  // Unused in Release configuration as the assert is not compiled in.
+  DebugUtils::unused(arch);
+
+  // Only called when `arch` matches X86 family.
+  ASMJIT_ASSERT(Environment::isFamilyARM(arch));
+
+  // Get the instruction data.
+  uint32_t realId = inst.id() & uint32_t(InstIdParts::kRealId);
+
+  if (ASMJIT_UNLIKELY(!Inst::isDefinedId(realId)))
+    return DebugUtils::errored(kErrorInvalidInstruction);
+
+  out->_instFlags = InstRWFlags::kNone;
+  out->_opCount = uint8_t(opCount);
+  out->_rmFeature = 0;
+  out->_extraReg.reset();
+  out->_readFlags = CpuRWFlags::kNone; // TODO: [ARM] Read PSTATUS.
+  out->_writeFlags = CpuRWFlags::kNone; // TODO: [ARM] Write PSTATUS
+
+  const InstDB::InstInfo& instInfo = InstDB::_instInfoTable[realId];
+  const InstRWInfoData& rwInfo = instRWInfoData[instInfo.rwInfoIndex()];
+
+  if (instInfo.hasFlag(InstDB::kInstFlagConsecutive) && opCount > 2) {
+    for (uint32_t i = 0; i < opCount; i++) {
+      OpRWInfo& op = out->_operands[i];
+      const Operand_& srcOp = operands[i];
+
+      if (!srcOp.isRegOrMem()) {
+        op.reset();
+        continue;
+      }
+
+      OpRWFlags rwFlags = i < opCount - 1 ? (OpRWFlags)rwInfo.rwx[0] : (OpRWFlags)rwInfo.rwx[1];
+
+      op._opFlags = rwFlags & ~(OpRWFlags::kZExt);
+      op._physId = BaseReg::kIdBad;
+      op._rmSize = 0;
+      op._resetReserved();
+
+      uint64_t rByteMask = op.isRead() ? 0xFFFFFFFFFFFFFFFFu : 0x0000000000000000u;
+      uint64_t wByteMask = op.isWrite() ? 0xFFFFFFFFFFFFFFFFu : 0x0000000000000000u;
+
+      op._readByteMask = rByteMask;
+      op._writeByteMask = wByteMask;
+      op._extendByteMask = 0;
+      op._consecutiveLeadCount = 0;
+
+      if (srcOp.isReg()) {
+        if (i == 0)
+          op._consecutiveLeadCount = uint8_t(opCount - 1);
+        else
+          op.addOpFlags(OpRWFlags::kConsecutive);
+      }
+      else {
+        const Mem& memOp = srcOp.as<Mem>();
+
+        if (memOp.hasBase()) {
+          op.addOpFlags(OpRWFlags::kMemBaseRead);
+        }
+
+        if (memOp.hasIndex()) {
+          op.addOpFlags(OpRWFlags::kMemIndexRead);
+          op.addOpFlags(memOp.isPreOrPost() ? OpRWFlags::kMemIndexWrite : OpRWFlags::kNone);
+        }
+      }
+    }
+  }
+  else {
+    for (uint32_t i = 0; i < opCount; i++) {
+      OpRWInfo& op = out->_operands[i];
+      const Operand_& srcOp = operands[i];
+
+      if (!srcOp.isRegOrMem()) {
+        op.reset();
+        continue;
+      }
+
+      OpRWFlags rwFlags = (OpRWFlags)rwInfo.rwx[i];
+
+      op._opFlags = rwFlags & ~(OpRWFlags::kZExt);
+      op._physId = BaseReg::kIdBad;
+      op._rmSize = 0;
+      op._resetReserved();
+
+      uint64_t rByteMask = op.isRead() ? 0xFFFFFFFFFFFFFFFFu : 0x0000000000000000u;
+      uint64_t wByteMask = op.isWrite() ? 0xFFFFFFFFFFFFFFFFu : 0x0000000000000000u;
+
+      op._readByteMask = rByteMask;
+      op._writeByteMask = wByteMask;
+      op._extendByteMask = 0;
+      op._consecutiveLeadCount = 0;
+
+      if (srcOp.isReg()) {
+        if (srcOp.as<Vec>().hasElementIndex()) {
+          // Only part of the vector is accessed if element index [] is used.
+          uint32_t elementType = srcOp.as<Vec>().elementType();
+          uint32_t elementIndex = srcOp.as<Vec>().elementIndex();
+
+          uint32_t elementSize = elementTypeSize[elementType];
+          uint64_t accessMask = uint64_t(Support::lsbMask<uint32_t>(elementSize)) << (elementIndex * elementSize);
+
+          op._readByteMask &= accessMask;
+          op._writeByteMask &= accessMask;
+        }
+
+        // TODO: [ARM] RW info is not finished.
+      }
+      else {
+        const Mem& memOp = srcOp.as<Mem>();
+
+        if (memOp.hasBase()) {
+          op.addOpFlags(OpRWFlags::kMemBaseRead);
+        }
+
+        if (memOp.hasIndex()) {
+          op.addOpFlags(OpRWFlags::kMemIndexRead);
+          op.addOpFlags(memOp.isPreOrPost() ? OpRWFlags::kMemIndexWrite : OpRWFlags::kNone);
+        }
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+#endif // !ASMJIT_NO_INTROSPECTION
+
+// a64::InstInternal - QueryFeatures
+// =================================
+
+#ifndef ASMJIT_NO_INTROSPECTION
+Error InstInternal::queryFeatures(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, CpuFeatures* out) noexcept {
+  // TODO: [ARM] QueryFeatures not implemented yet.
+  DebugUtils::unused(arch, inst, operands, opCount, out);
+  return kErrorOk;
+}
+#endif // !ASMJIT_NO_INTROSPECTION
+
+// a64::InstInternal - Unit
+// ========================
+
+#if defined(ASMJIT_TEST)
+UNIT(arm_inst_api_text) {
+  // TODO:
+}
+#endif
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64
diff --git a/lib/lepton/asmjit/arm/a64instapi_p.h b/lib/lepton/asmjit/arm/a64instapi_p.h
new file mode 100644
index 0000000000..320a3e881d
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64instapi_p.h
@@ -0,0 +1,41 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64INSTAPI_P_H_INCLUDED
+#define ASMJIT_ARM_A64INSTAPI_P_H_INCLUDED
+
+#include "../core/inst.h"
+#include "../core/operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+namespace InstInternal {
+
+#ifndef ASMJIT_NO_TEXT
+Error ASMJIT_CDECL instIdToString(Arch arch, InstId instId, String& output) noexcept;
+InstId ASMJIT_CDECL stringToInstId(Arch arch, const char* s, size_t len) noexcept;
+#endif // !ASMJIT_NO_TEXT
+
+#ifndef ASMJIT_NO_VALIDATION
+Error ASMJIT_CDECL validate(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags) noexcept;
+#endif // !ASMJIT_NO_VALIDATION
+
+#ifndef ASMJIT_NO_INTROSPECTION
+Error ASMJIT_CDECL queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept;
+Error ASMJIT_CDECL queryFeatures(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, CpuFeatures* out) noexcept;
+#endif // !ASMJIT_NO_INTROSPECTION
+
+} // {InstInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64INSTAPI_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64instdb.cpp b/lib/lepton/asmjit/arm/a64instdb.cpp
new file mode 100644
index 0000000000..64709b5db0
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64instdb.cpp
@@ -0,0 +1,1957 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64)
+
+#include "../core/codeholder.h"
+#include "../core/support.h"
+#include "../arm/a64instdb_p.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+namespace InstDB {
+
+// a64::InstDB - InstInfoTable
+// ===========================
+
+// Don't store `_nameDataIndex` if instruction names are disabled. Since some
+// APIs can use `_nameDataIndex` it's much safer if it's zero if it's not used.
+#if defined(ASMJIT_NO_TEXT)
+  #define NAME_DATA_INDEX(x) 0
+#else
+  #define NAME_DATA_INDEX(x) x
+#endif
+
+// Defines an ARM/AArch64 instruction.
+#define INST(id, opcodeEncoding, opcodeData, rwInfoIndex, flags, opcodeDataIndex, nameDataIndex) { \
+  uint32_t(kEncoding##opcodeEncoding),      \
+  uint32_t(opcodeDataIndex),                \
+  0,                                        \
+  uint32_t(NAME_DATA_INDEX(nameDataIndex)), \
+  uint16_t(rwInfoIndex),                    \
+  uint16_t(flags)                           \
+}
+
+#define F(flag) kInstFlag##flag
+
+// TODO: [ARM] Missing Instructions:
+/*
+BLRAA, BLRAAZ, BLRAB, BLRABZ: Branch with Link to Register, with pointer authentication.
+BRAA, BRAAZ, BRAB, BRABZ: Branch to Register, with pointer authentication.
+
+CFP: Control Flow Prediction Restriction by Context: an alias of SYS.
+CPP: Cache Prefetch Prediction Restriction by Context: an alias of SYS.
+DVP: Data Value Prediction Restriction by Context: an alias of SYS.
+PSB CSYNC: Profiling Synchronization Barrier.
+
+ERETAA, ERETAB: Exception Return, with pointer authentication.
+LDAPxxx
+PACIA, PACIA1716, PACIASP, PACIAZ, PACIZA: Pointer Authentication Code for Instruction address, using key A.
+PACIB, PACIB1716, PACIBSP, PACIBZ, PACIZB: Pointer Authentication Code for Instruction address, using key B.
+PRFM (immediate): Prefetch Memory (immediate).
+PRFM (literal): Prefetch Memory (literal).
+PRFM (register): Prefetch Memory (register).
+PRFUM: Prefetch Memory (unscaled offset).
+RETAA, RETAB: Return from subroutine, with pointer authentication.
+RMIF: Rotate, Mask Insert Flags.
+SYSL
+IRG: Insert Random Tag.
+INST_(Irg              , BaseRRR            , (0b1001101011000000000100, kX , kSP, kX , kSP, kX , kZR, true)                        , kRWI_W    , 0                         , 0  , 1   ), // #1
+*/
+const InstInfo _instInfoTable[] = {
+  // +------------------+---------------------+--------------------------------------------------------------------------------------+-----------+---------------------------+----+-----+
+  // | Instruction Id   | Encoding            | Opcode Data                                                                          | RW Info   | Instruction Flags         |DatX|NameX|
+  // +------------------+---------------------+--------------------------------------------------------------------------------------+-----------+---------------------------+----+-----+
+  // ${InstInfo:Begin}
+  INST(None             , None               , (_)                                                                                   , 0         , 0                         , 0  , 0   ), // #0
+  INST(Adc              , BaseRRR            , (0b0001101000000000000000, kWX, kZR, kWX, kZR, kWX, kZR, true)                        , kRWI_W    , 0                         , 0  , 1   ), // #1
+  INST(Adcs             , BaseRRR            , (0b0011101000000000000000, kWX, kZR, kWX, kZR, kWX, kZR, true)                        , kRWI_W    , 0                         , 1  , 5   ), // #2
+  INST(Add              , BaseAddSub         , (0b0001011000, 0b0001011001, 0b0010001)                                               , kRWI_W    , 0                         , 0  , 978 ), // #3
+  INST(Addg             , BaseRRII           , (0b1001000110000000000000, kX, kSP, kX, kSP, 6, 4, 16, 4, 0, 10)                      , kRWI_W    , 0                         , 0  , 10  ), // #4
+  INST(Adds             , BaseAddSub         , (0b0101011000, 0b0101011001, 0b0110001)                                               , kRWI_W    , 0                         , 1  , 15  ), // #5
+  INST(Adr              , BaseAdr            , (0b0001000000000000000000, OffsetType::kAArch64_ADR)                                  , kRWI_W    , 0                         , 0  , 25  ), // #6
+  INST(Adrp             , BaseAdr            , (0b1001000000000000000000, OffsetType::kAArch64_ADRP)                                 , kRWI_W    , 0                         , 1  , 29  ), // #7
+  INST(And              , BaseLogical        , (0b0001010000, 0b00100100, 0)                                                         , kRWI_W    , 0                         , 0  , 57  ), // #8
+  INST(Ands             , BaseLogical        , (0b1101010000, 0b11100100, 0)                                                         , kRWI_W    , 0                         , 1  , 61  ), // #9
+  INST(Asr              , BaseShift          , (0b0001101011000000001010, 0b0001001100000000011111, 0)                               , kRWI_W    , 0                         , 0  , 66  ), // #10
+  INST(Asrv             , BaseShift          , (0b0001101011000000001010, 0b0000000000000000000000, 0)                               , kRWI_W    , 0                         , 1  , 70  ), // #11
+  INST(At               , BaseAtDcIcTlbi     , (0b00011111110000, 0b00001111000000, true)                                            , kRWI_RX   , 0                         , 0  , 75  ), // #12
+  INST(Autda            , BaseRR             , (0b11011010110000010001100000000000, kX, kZR, 0, kX, kSP, 5, true)                    , kRWI_X    , 0                         , 0  , 78  ), // #13
+  INST(Autdza           , BaseR              , (0b11011010110000010011101111100000, kX, kZR, 0)                                      , kRWI_X    , 0                         , 0  , 90  ), // #14
+  INST(Autdb            , BaseRR             , (0b11011010110000010001110000000000, kX, kZR, 0, kX, kSP, 5, true)                    , kRWI_X    , 0                         , 1  , 84  ), // #15
+  INST(Autdzb           , BaseR              , (0b11011010110000010011111111100000, kX, kZR, 0)                                      , kRWI_X    , 0                         , 1  , 97  ), // #16
+  INST(Autia            , BaseRR             , (0b11011010110000010001000000000000, kX, kZR, 0, kX, kSP, 5, true)                    , kRWI_X    , 0                         , 2  , 104 ), // #17
+  INST(Autia1716        , BaseOp             , (0b11010101000000110010000110011111)                                                  , 0         , 0                         , 0  , 110 ), // #18
+  INST(Autiasp          , BaseOp             , (0b11010101000000110010001110111111)                                                  , 0         , 0                         , 1  , 120 ), // #19
+  INST(Autiaz           , BaseOp             , (0b11010101000000110010001110011111)                                                  , 0         , 0                         , 2  , 128 ), // #20
+  INST(Autib            , BaseRR             , (0b11011010110000010001010000000000, kX, kZR, 0, kX, kSP, 5, true)                    , kRWI_X    , 0                         , 3  , 135 ), // #21
+  INST(Autib1716        , BaseOp             , (0b11010101000000110010000111011111)                                                  , 0         , 0                         , 3  , 141 ), // #22
+  INST(Autibsp          , BaseOp             , (0b11010101000000110010001111111111)                                                  , 0         , 0                         , 4  , 151 ), // #23
+  INST(Autibz           , BaseOp             , (0b11010101000000110010001111011111)                                                  , 0         , 0                         , 5  , 159 ), // #24
+  INST(Autiza           , BaseR              , (0b11011010110000010011001111100000, kX, kZR, 0)                                      , kRWI_X    , 0                         , 2  , 166 ), // #25
+  INST(Autizb           , BaseR              , (0b11011010110000010011011111100000, kX, kZR, 0)                                      , kRWI_X    , 0                         , 3  , 173 ), // #26
+  INST(Axflag           , BaseOp             , (0b11010101000000000100000001011111)                                                  , 0         , 0                         , 6  , 180 ), // #27
+  INST(B                , BaseBranchRel      , (0b00010100000000000000000000000000)                                                  , 0         , F(Cond)                   , 0  , 1738), // #28
+  INST(Bfc              , BaseBfc            , (0b00110011000000000000001111100000)                                                  , kRWI_X    , 0                         , 0  , 192 ), // #29
+  INST(Bfi              , BaseBfi            , (0b00110011000000000000000000000000)                                                  , kRWI_X    , 0                         , 0  , 223 ), // #30
+  INST(Bfm              , BaseBfm            , (0b00110011000000000000000000000000)                                                  , kRWI_X    , 0                         , 0  , 2514), // #31
+  INST(Bfxil            , BaseBfx            , (0b00110011000000000000000000000000)                                                  , kRWI_X    , 0                         , 0  , 250 ), // #32
+  INST(Bic              , BaseLogical        , (0b0001010001, 0b00100100, 1)                                                         , kRWI_W    , 0                         , 2  , 256 ), // #33
+  INST(Bics             , BaseLogical        , (0b1101010001, 0b11100100, 1)                                                         , kRWI_W    , 0                         , 3  , 260 ), // #34
+  INST(Bl               , BaseBranchRel      , (0b10010100000000000000000000000000)                                                  , 0         , 0                         , 1  , 2831), // #35
+  INST(Blr              , BaseBranchReg      , (0b11010110001111110000000000000000)                                                  , kRWI_R    , 0                         , 0  , 269 ), // #36
+  INST(Br               , BaseBranchReg      , (0b11010110000111110000000000000000)                                                  , kRWI_R    , 0                         , 1  , 273 ), // #37
+  INST(Brk              , BaseOpImm          , (0b11010100001000000000000000000000, 16, 5)                                           , 0         , 0                         , 0  , 276 ), // #38
+  INST(Cas              , BaseAtomicOp       , (0b1000100010100000011111, kWX, 30, 0)                                                , kRWI_XRX  , 0                         , 0  , 284 ), // #39
+  INST(Casa             , BaseAtomicOp       , (0b1000100011100000011111, kWX, 30, 1)                                                , kRWI_XRX  , 0                         , 1  , 288 ), // #40
+  INST(Casab            , BaseAtomicOp       , (0b0000100011100000011111, kW , 0 , 1)                                                , kRWI_XRX  , 0                         , 2  , 293 ), // #41
+  INST(Casah            , BaseAtomicOp       , (0b0100100011100000011111, kW , 0 , 1)                                                , kRWI_XRX  , 0                         , 3  , 299 ), // #42
+  INST(Casal            , BaseAtomicOp       , (0b1000100011100000111111, kWX, 30, 1)                                                , kRWI_XRX  , 0                         , 4  , 305 ), // #43
+  INST(Casalb           , BaseAtomicOp       , (0b0000100011100000111111, kW , 0 , 1)                                                , kRWI_XRX  , 0                         , 5  , 311 ), // #44
+  INST(Casalh           , BaseAtomicOp       , (0b0100100011100000111111, kW , 0 , 1)                                                , kRWI_XRX  , 0                         , 6  , 318 ), // #45
+  INST(Casb             , BaseAtomicOp       , (0b0000100010100000011111, kW , 0 , 0)                                                , kRWI_XRX  , 0                         , 7  , 325 ), // #46
+  INST(Cash             , BaseAtomicOp       , (0b0100100010100000011111, kW , 0 , 0)                                                , kRWI_XRX  , 0                         , 8  , 330 ), // #47
+  INST(Casl             , BaseAtomicOp       , (0b1000100010100000111111, kWX, 30, 0)                                                , kRWI_XRX  , 0                         , 9  , 335 ), // #48
+  INST(Caslb            , BaseAtomicOp       , (0b0000100010100000111111, kW , 0 , 0)                                                , kRWI_XRX  , 0                         , 10 , 340 ), // #49
+  INST(Caslh            , BaseAtomicOp       , (0b0100100010100000111111, kW , 0 , 0)                                                , kRWI_XRX  , 0                         , 11 , 346 ), // #50
+  INST(Casp             , BaseAtomicCasp     , (0b0000100000100000011111, kWX, 30)                                                   , kRWI_XXRRX, 0                         , 0  , 352 ), // #51
+  INST(Caspa            , BaseAtomicCasp     , (0b0000100001100000011111, kWX, 30)                                                   , kRWI_XXRRX, 0                         , 1  , 357 ), // #52
+  INST(Caspal           , BaseAtomicCasp     , (0b0000100001100000111111, kWX, 30)                                                   , kRWI_XXRRX, 0                         , 2  , 363 ), // #53
+  INST(Caspl            , BaseAtomicCasp     , (0b0000100000100000111111, kWX, 30)                                                   , kRWI_XXRRX, 0                         , 3  , 370 ), // #54
+  INST(Cbnz             , BaseBranchCmp      , (0b00110101000000000000000000000000)                                                  , kRWI_R    , 0                         , 0  , 376 ), // #55
+  INST(Cbz              , BaseBranchCmp      , (0b00110100000000000000000000000000)                                                  , kRWI_R    , 0                         , 1  , 381 ), // #56
+  INST(Ccmn             , BaseCCmp           , (0b00111010010000000000000000000000)                                                  , kRWI_R    , 0                         , 0  , 385 ), // #57
+  INST(Ccmp             , BaseCCmp           , (0b01111010010000000000000000000000)                                                  , kRWI_R    , 0                         , 1  , 650 ), // #58
+  INST(Cfinv            , BaseOp             , (0b11010101000000000100000000011111)                                                  , 0         , 0                         , 7  , 390 ), // #59
+  INST(Cinc             , BaseCInc           , (0b00011010100000000000010000000000)                                                  , kRWI_W    , 0                         , 0  , 396 ), // #60
+  INST(Cinv             , BaseCInc           , (0b01011010100000000000000000000000)                                                  , kRWI_W    , 0                         , 1  , 401 ), // #61
+  INST(Clrex            , BaseOpImm          , (0b11010101000000110011000001011111, 4, 8)                                            , 0         , 0                         , 1  , 406 ), // #62
+  INST(Cls              , BaseRR             , (0b01011010110000000001010000000000, kWX, kZR, 0, kWX, kZR, 5, true)                  , kRWI_W    , 0                         , 4  , 412 ), // #63
+  INST(Clz              , BaseRR             , (0b01011010110000000001000000000000, kWX, kZR, 0, kWX, kZR, 5, true)                  , kRWI_W    , 0                         , 5  , 416 ), // #64
+  INST(Cmn              , BaseCmpCmn         , (0b0101011000, 0b0101011001, 0b0110001)                                               , kRWI_R    , 0                         , 0  , 386 ), // #65
+  INST(Cmp              , BaseCmpCmn         , (0b1101011000, 0b1101011001, 0b1110001)                                               , kRWI_R    , 0                         , 1  , 651 ), // #66
+  INST(Cmpp             , BaseRR             , (0b10111010110000000000000000011111, kX, kSP, 5, kX, kSP, 16, true)                   , kRWI_R    , 0                         , 6  , 430 ), // #67
+  INST(Cneg             , BaseCInc           , (0b01011010100000000000010000000000)                                                  , kRWI_W    , 0                         , 2  , 441 ), // #68
+  INST(Crc32b           , BaseRRR            , (0b0001101011000000010000, kW, kZR, kW, kZR, kW, kZR, false)                          , kRWI_W    , 0                         , 2  , 450 ), // #69
+  INST(Crc32cb          , BaseRRR            , (0b0001101011000000010100, kW, kZR, kW, kZR, kW, kZR, false)                          , kRWI_W    , 0                         , 3  , 457 ), // #70
+  INST(Crc32ch          , BaseRRR            , (0b0001101011000000010101, kW, kZR, kW, kZR, kW, kZR, false)                          , kRWI_W    , 0                         , 4  , 465 ), // #71
+  INST(Crc32cw          , BaseRRR            , (0b0001101011000000010110, kW, kZR, kW, kZR, kW, kZR, false)                          , kRWI_W    , 0                         , 5  , 473 ), // #72
+  INST(Crc32cx          , BaseRRR            , (0b1001101011000000010111, kW, kZR, kW, kZR, kX, kZR, false)                          , kRWI_W    , 0                         , 6  , 481 ), // #73
+  INST(Crc32h           , BaseRRR            , (0b0001101011000000010001, kW, kZR, kW, kZR, kW, kZR, false)                          , kRWI_W    , 0                         , 7  , 489 ), // #74
+  INST(Crc32w           , BaseRRR            , (0b0001101011000000010010, kW, kZR, kW, kZR, kW, kZR, false)                          , kRWI_W    , 0                         , 8  , 496 ), // #75
+  INST(Crc32x           , BaseRRR            , (0b1001101011000000010011, kW, kZR, kW, kZR, kX, kZR, false)                          , kRWI_W    , 0                         , 9  , 503 ), // #76
+  INST(Csdb             , BaseOp             , (0b11010101000000110010001010011111)                                                  , 0         , 0                         , 8  , 510 ), // #77
+  INST(Csel             , BaseCSel           , (0b00011010100000000000000000000000)                                                  , kRWI_W    , 0                         , 0  , 710 ), // #78
+  INST(Cset             , BaseCSet           , (0b00011010100111110000011111100000)                                                  , kRWI_W    , 0                         , 0  , 515 ), // #79
+  INST(Csetm            , BaseCSet           , (0b01011010100111110000001111100000)                                                  , kRWI_W    , 0                         , 1  , 520 ), // #80
+  INST(Csinc            , BaseCSel           , (0b00011010100000000000010000000000)                                                  , kRWI_W    , 0                         , 1  , 526 ), // #81
+  INST(Csinv            , BaseCSel           , (0b01011010100000000000000000000000)                                                  , kRWI_W    , 0                         , 2  , 532 ), // #82
+  INST(Csneg            , BaseCSel           , (0b01011010100000000000010000000000)                                                  , kRWI_W    , 0                         , 3  , 538 ), // #83
+  INST(Dc               , BaseAtDcIcTlbi     , (0b00011110000000, 0b00001110000000, true)                                            , kRWI_RX   , 0                         , 1  , 2   ), // #84
+  INST(Dcps1            , BaseOpImm          , (0b11010100101000000000000000000001, 16, 5)                                           , 0         , 0                         , 2  , 544 ), // #85
+  INST(Dcps2            , BaseOpImm          , (0b11010100101000000000000000000010, 16, 5)                                           , 0         , 0                         , 3  , 550 ), // #86
+  INST(Dcps3            , BaseOpImm          , (0b11010100101000000000000000000011, 16, 5)                                           , 0         , 0                         , 4  , 556 ), // #87
+  INST(Dgh              , BaseOp             , (0b11010101000000110010000011011111)                                                  , 0         , 0                         , 9  , 562 ), // #88
+  INST(Dmb              , BaseOpImm          , (0b11010101000000110011000010111111, 4, 8)                                            , 0         , 0                         , 5  , 566 ), // #89
+  INST(Drps             , BaseOp             , (0b11010110101111110000001111100000)                                                  , 0         , 0                         , 10 , 570 ), // #90
+  INST(Dsb              , BaseOpImm          , (0b11010101000000110011000010011111, 4, 8)                                            , 0         , 0                         , 6  , 575 ), // #91
+  INST(Eon              , BaseLogical        , (0b1001010001, 0b10100100, 1)                                                         , kRWI_W    , 0                         , 4  , 583 ), // #92
+  INST(Eor              , BaseLogical        , (0b1001010000, 0b10100100, 0)                                                         , kRWI_W    , 0                         , 5  , 1418), // #93
+  INST(Esb              , BaseOp             , (0b11010101000000110010001000011111)                                                  , 0         , 0                         , 11 , 597 ), // #94
+  INST(Extr             , BaseExtract        , (0b00010011100000000000000000000000)                                                  , kRWI_W    , 0                         , 0  , 605 ), // #95
+  INST(Eret             , BaseOp             , (0b11010110100111110000001111100000)                                                  , 0         , 0                         , 12 , 592 ), // #96
+  INST(Gmi              , BaseRRR            , (0b1001101011000000000101, kX , kZR, kX , kSP, kX , kZR, true)                        , kRWI_W    , 0                         , 10 , 1128), // #97
+  INST(Hint             , BaseOpImm          , (0b11010101000000110010000000011111, 7, 5)                                            , 0         , 0                         , 7  , 1132), // #98
+  INST(Hlt              , BaseOpImm          , (0b11010100010000000000000000000000, 16, 5)                                           , 0         , 0                         , 8  , 1137), // #99
+  INST(Hvc              , BaseOpImm          , (0b11010100000000000000000000000010, 16, 5)                                           , 0         , 0                         , 9  , 1141), // #100
+  INST(Ic               , BaseAtDcIcTlbi     , (0b00011110000000, 0b00001110000000, false)                                           , kRWI_RX   , 0                         , 2  , 257 ), // #101
+  INST(Isb              , BaseOpImm          , (0b11010101000000110011000011011111, 4, 8)                                            , 0         , 0                         , 10 , 1149), // #102
+  INST(Ldadd            , BaseAtomicOp       , (0b1011100000100000000000, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 12 , 1189), // #103
+  INST(Ldadda           , BaseAtomicOp       , (0b1011100010100000000000, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 13 , 1195), // #104
+  INST(Ldaddab          , BaseAtomicOp       , (0b0011100010100000000000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 14 , 1202), // #105
+  INST(Ldaddah          , BaseAtomicOp       , (0b0111100010100000000000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 15 , 1210), // #106
+  INST(Ldaddal          , BaseAtomicOp       , (0b1011100011100000000000, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 16 , 1218), // #107
+  INST(Ldaddalb         , BaseAtomicOp       , (0b0011100011100000000000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 17 , 1226), // #108
+  INST(Ldaddalh         , BaseAtomicOp       , (0b0111100011100000000000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 18 , 1235), // #109
+  INST(Ldaddb           , BaseAtomicOp       , (0b0011100000100000000000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 19 , 1244), // #110
+  INST(Ldaddh           , BaseAtomicOp       , (0b0111100000100000000000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 20 , 1251), // #111
+  INST(Ldaddl           , BaseAtomicOp       , (0b1011100001100000000000, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 21 , 1258), // #112
+  INST(Ldaddlb          , BaseAtomicOp       , (0b0011100001100000000000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 22 , 1265), // #113
+  INST(Ldaddlh          , BaseAtomicOp       , (0b0111100001100000000000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 23 , 1273), // #114
+  INST(Ldar             , BaseRM_NoImm       , (0b1000100011011111111111, kWX, kZR, 30)                                              , kRWI_W    , 0                         , 0  , 1281), // #115
+  INST(Ldarb            , BaseRM_NoImm       , (0b0000100011011111111111, kW , kZR, 0 )                                              , kRWI_W    , 0                         , 1  , 1286), // #116
+  INST(Ldarh            , BaseRM_NoImm       , (0b0100100011011111111111, kW , kZR, 0 )                                              , kRWI_W    , 0                         , 2  , 1292), // #117
+  INST(Ldaxp            , BaseLdxp           , (0b1000100001111111100000, kWX, 30)                                                   , kRWI_WW   , 0                         , 0  , 1298), // #118
+  INST(Ldaxr            , BaseRM_NoImm       , (0b1000100001011111111111, kWX, kZR, 30)                                              , kRWI_W    , 0                         , 3  , 1304), // #119
+  INST(Ldaxrb           , BaseRM_NoImm       , (0b0000100001011111111111, kW , kZR, 0 )                                              , kRWI_W    , 0                         , 4  , 1310), // #120
+  INST(Ldaxrh           , BaseRM_NoImm       , (0b0100100001011111111111, kW , kZR, 0 )                                              , kRWI_W    , 0                         , 5  , 1317), // #121
+  INST(Ldclr            , BaseAtomicOp       , (0b1011100000100000000100, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 24 , 1324), // #122
+  INST(Ldclra           , BaseAtomicOp       , (0b1011100010100000000100, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 25 , 1330), // #123
+  INST(Ldclrab          , BaseAtomicOp       , (0b0011100010100000000100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 26 , 1337), // #124
+  INST(Ldclrah          , BaseAtomicOp       , (0b0111100010100000000100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 27 , 1345), // #125
+  INST(Ldclral          , BaseAtomicOp       , (0b1011100011100000000100, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 28 , 1353), // #126
+  INST(Ldclralb         , BaseAtomicOp       , (0b0011100011100000000100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 29 , 1361), // #127
+  INST(Ldclralh         , BaseAtomicOp       , (0b0111100011100000000100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 30 , 1370), // #128
+  INST(Ldclrb           , BaseAtomicOp       , (0b0011100000100000000100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 31 , 1379), // #129
+  INST(Ldclrh           , BaseAtomicOp       , (0b0111100000100000000100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 32 , 1386), // #130
+  INST(Ldclrl           , BaseAtomicOp       , (0b1011100001100000000100, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 33 , 1393), // #131
+  INST(Ldclrlb          , BaseAtomicOp       , (0b0011100001100000000100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 34 , 1400), // #132
+  INST(Ldclrlh          , BaseAtomicOp       , (0b0111100001100000000100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 35 , 1408), // #133
+  INST(Ldeor            , BaseAtomicOp       , (0b1011100000100000001000, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 36 , 1416), // #134
+  INST(Ldeora           , BaseAtomicOp       , (0b1011100010100000001000, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 37 , 1422), // #135
+  INST(Ldeorab          , BaseAtomicOp       , (0b0011100010100000001000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 38 , 1429), // #136
+  INST(Ldeorah          , BaseAtomicOp       , (0b0111100010100000001000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 39 , 1437), // #137
+  INST(Ldeoral          , BaseAtomicOp       , (0b1011100011100000001000, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 40 , 1445), // #138
+  INST(Ldeoralb         , BaseAtomicOp       , (0b0011100011100000001000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 41 , 1453), // #139
+  INST(Ldeoralh         , BaseAtomicOp       , (0b0111100011100000001000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 42 , 1462), // #140
+  INST(Ldeorb           , BaseAtomicOp       , (0b0011100000100000001000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 43 , 1471), // #141
+  INST(Ldeorh           , BaseAtomicOp       , (0b0111100000100000001000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 44 , 1478), // #142
+  INST(Ldeorl           , BaseAtomicOp       , (0b1011100001100000001000, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 45 , 1485), // #143
+  INST(Ldeorlb          , BaseAtomicOp       , (0b0011100001100000001000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 46 , 1492), // #144
+  INST(Ldeorlh          , BaseAtomicOp       , (0b0111100001100000001000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 47 , 1500), // #145
+  INST(Ldg              , BaseRM_SImm9       , (0b1101100101100000000000, 0b0000000000000000000000, kX , kZR, 0, 4)                  , kRWI_W    , 0                         , 0  , 1508), // #146
+  INST(Ldgm             , BaseRM_NoImm       , (0b1101100111100000000000, kX , kZR, 0 )                                              , kRWI_W    , 0                         , 6  , 1512), // #147
+  INST(Ldlar            , BaseRM_NoImm       , (0b1000100011011111011111, kWX, kZR, 30)                                              , kRWI_W    , 0                         , 7  , 1517), // #148
+  INST(Ldlarb           , BaseRM_NoImm       , (0b0000100011011111011111, kW , kZR, 0 )                                              , kRWI_W    , 0                         , 8  , 1523), // #149
+  INST(Ldlarh           , BaseRM_NoImm       , (0b0100100011011111011111, kW , kZR, 0 )                                              , kRWI_W    , 0                         , 9  , 1530), // #150
+  INST(Ldnp             , BaseLdpStp         , (0b0010100001, 0           , kWX, 31, 2)                                              , kRWI_WW   , 0                         , 0  , 1537), // #151
+  INST(Ldp              , BaseLdpStp         , (0b0010100101, 0b0010100011, kWX, 31, 2)                                              , kRWI_W    , 0                         , 1  , 1542), // #152
+  INST(Ldpsw            , BaseLdpStp         , (0b0110100101, 0b0110100011, kX , 0 , 2)                                              , kRWI_WW   , 0                         , 2  , 1546), // #153
+  INST(Ldr              , BaseLdSt           , (0b1011100101, 0b10111000010, 0b10111000011, 0b00011000, kWX, 30, 2, Inst::kIdLdur)   , kRWI_W    , 0                         , 0  , 1552), // #154
+  INST(Ldraa            , BaseRM_SImm10      , (0b1111100000100000000001, kX , kZR, 0, 3)                                            , kRWI_W    , 0                         , 0  , 1556), // #155
+  INST(Ldrab            , BaseRM_SImm10      , (0b1111100010100000000001, kX , kZR, 0, 3)                                            , kRWI_W    , 0                         , 1  , 1562), // #156
+  INST(Ldrb             , BaseLdSt           , (0b0011100101, 0b00111000010, 0b00111000011, 0         , kW , 0 , 0, Inst::kIdLdurb)  , kRWI_W    , 0                         , 1  , 1568), // #157
+  INST(Ldrh             , BaseLdSt           , (0b0111100101, 0b01111000010, 0b01111000011, 0         , kW , 0 , 1, Inst::kIdLdurh)  , kRWI_W    , 0                         , 2  , 1573), // #158
+  INST(Ldrsb            , BaseLdSt           , (0b0011100111, 0b00111000100, 0b00111000101, 0         , kWX, 22, 0, Inst::kIdLdursb) , kRWI_W    , 0                         , 3  , 1578), // #159
+  INST(Ldrsh            , BaseLdSt           , (0b0111100110, 0b01111000100, 0b01111000101, 0         , kWX, 22, 1, Inst::kIdLdursh) , kRWI_W    , 0                         , 4  , 1584), // #160
+  INST(Ldrsw            , BaseLdSt           , (0b1011100110, 0b10111000100, 0b10111000101, 0b10011000, kX , 0 , 2, Inst::kIdLdursw) , kRWI_W    , 0                         , 5  , 1590), // #161
+  INST(Ldset            , BaseAtomicOp       , (0b1011100000100000001100, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 48 , 1596), // #162
+  INST(Ldseta           , BaseAtomicOp       , (0b1011100010100000001100, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 49 , 1602), // #163
+  INST(Ldsetab          , BaseAtomicOp       , (0b0011100010100000001100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 50 , 1609), // #164
+  INST(Ldsetah          , BaseAtomicOp       , (0b0111100010100000001100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 51 , 1617), // #165
+  INST(Ldsetal          , BaseAtomicOp       , (0b1011100011100000001100, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 52 , 1625), // #166
+  INST(Ldsetalb         , BaseAtomicOp       , (0b0011100011100000001100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 53 , 1633), // #167
+  INST(Ldsetalh         , BaseAtomicOp       , (0b0111100011100000001100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 54 , 1642), // #168
+  INST(Ldsetb           , BaseAtomicOp       , (0b0011100000100000001100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 55 , 1651), // #169
+  INST(Ldseth           , BaseAtomicOp       , (0b0111100000100000001100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 56 , 1658), // #170
+  INST(Ldsetl           , BaseAtomicOp       , (0b1011100001100000001100, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 57 , 1665), // #171
+  INST(Ldsetlb          , BaseAtomicOp       , (0b0011100001100000001100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 58 , 1672), // #172
+  INST(Ldsetlh          , BaseAtomicOp       , (0b0111100001100000001100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 59 , 1680), // #173
+  INST(Ldsmax           , BaseAtomicOp       , (0b1011100000100000010000, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 60 , 1688), // #174
+  INST(Ldsmaxa          , BaseAtomicOp       , (0b1011100010100000010000, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 61 , 1695), // #175
+  INST(Ldsmaxab         , BaseAtomicOp       , (0b0011100010100000010000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 62 , 1703), // #176
+  INST(Ldsmaxah         , BaseAtomicOp       , (0b0111100010100000010000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 63 , 1712), // #177
+  INST(Ldsmaxal         , BaseAtomicOp       , (0b1011100011100000010000, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 64 , 1721), // #178
+  INST(Ldsmaxalb        , BaseAtomicOp       , (0b0011100011100000010000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 65 , 1730), // #179
+  INST(Ldsmaxalh        , BaseAtomicOp       , (0b0111100011100000010000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 66 , 1740), // #180
+  INST(Ldsmaxb          , BaseAtomicOp       , (0b0011100000100000010000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 67 , 1750), // #181
+  INST(Ldsmaxh          , BaseAtomicOp       , (0b0111100000100000010000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 68 , 1758), // #182
+  INST(Ldsmaxl          , BaseAtomicOp       , (0b1011100001100000010000, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 69 , 1766), // #183
+  INST(Ldsmaxlb         , BaseAtomicOp       , (0b0011100001100000010000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 70 , 1774), // #184
+  INST(Ldsmaxlh         , BaseAtomicOp       , (0b0111100001100000010000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 71 , 1783), // #185
+  INST(Ldsmin           , BaseAtomicOp       , (0b1011100000100000010100, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 72 , 1792), // #186
+  INST(Ldsmina          , BaseAtomicOp       , (0b1011100010100000010100, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 73 , 1799), // #187
+  INST(Ldsminab         , BaseAtomicOp       , (0b0011100010100000010100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 74 , 1807), // #188
+  INST(Ldsminah         , BaseAtomicOp       , (0b0111100010100000010100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 75 , 1816), // #189
+  INST(Ldsminal         , BaseAtomicOp       , (0b1011100011100000010100, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 76 , 1825), // #190
+  INST(Ldsminalb        , BaseAtomicOp       , (0b0011100011100000010100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 77 , 1834), // #191
+  INST(Ldsminalh        , BaseAtomicOp       , (0b0111100011100000010100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 78 , 1844), // #192
+  INST(Ldsminb          , BaseAtomicOp       , (0b0011100000100000010100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 79 , 1854), // #193
+  INST(Ldsminh          , BaseAtomicOp       , (0b0111100000100000010100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 80 , 1862), // #194
+  INST(Ldsminl          , BaseAtomicOp       , (0b1011100001100000010100, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 81 , 1870), // #195
+  INST(Ldsminlb         , BaseAtomicOp       , (0b0011100001100000010100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 82 , 1878), // #196
+  INST(Ldsminlh         , BaseAtomicOp       , (0b0111100001100000010100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 83 , 1887), // #197
+  INST(Ldtr             , BaseRM_SImm9       , (0b1011100001000000000010, 0b0000000000000000000000, kWX, kZR, 30, 0)                 , kRWI_W    , 0                         , 1  , 1896), // #198
+  INST(Ldtrb            , BaseRM_SImm9       , (0b0011100001000000000010, 0b0000000000000000000000, kW , kZR, 0 , 0)                 , kRWI_W    , 0                         , 2  , 1901), // #199
+  INST(Ldtrh            , BaseRM_SImm9       , (0b0111100001000000000010, 0b0000000000000000000000, kW , kZR, 0 , 0)                 , kRWI_W    , 0                         , 3  , 1907), // #200
+  INST(Ldtrsb           , BaseRM_SImm9       , (0b0011100011000000000010, 0b0000000000000000000000, kWX, kZR, 22, 0)                 , kRWI_W    , 0                         , 4  , 1913), // #201
+  INST(Ldtrsh           , BaseRM_SImm9       , (0b0111100011000000000010, 0b0000000000000000000000, kWX, kZR, 22, 0)                 , kRWI_W    , 0                         , 5  , 1920), // #202
+  INST(Ldtrsw           , BaseRM_SImm9       , (0b1011100010000000000010, 0b0000000000000000000000, kX , kZR, 0 , 0)                 , kRWI_W    , 0                         , 6  , 1927), // #203
+  INST(Ldumax           , BaseAtomicOp       , (0b1011100000100000011000, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 84 , 1934), // #204
+  INST(Ldumaxa          , BaseAtomicOp       , (0b1011100010100000011000, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 85 , 1941), // #205
+  INST(Ldumaxab         , BaseAtomicOp       , (0b0011100010100000011000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 86 , 1949), // #206
+  INST(Ldumaxah         , BaseAtomicOp       , (0b0111100010100000011000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 87 , 1958), // #207
+  INST(Ldumaxal         , BaseAtomicOp       , (0b1011100011100000011000, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 88 , 1967), // #208
+  INST(Ldumaxalb        , BaseAtomicOp       , (0b0011100011100000011000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 89 , 1976), // #209
+  INST(Ldumaxalh        , BaseAtomicOp       , (0b0111100011100000011000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 90 , 1986), // #210
+  INST(Ldumaxb          , BaseAtomicOp       , (0b0011100000100000011000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 91 , 1996), // #211
+  INST(Ldumaxh          , BaseAtomicOp       , (0b0111100000100000011000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 92 , 2004), // #212
+  INST(Ldumaxl          , BaseAtomicOp       , (0b1011100001100000011000, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 93 , 2012), // #213
+  INST(Ldumaxlb         , BaseAtomicOp       , (0b0011100001100000011000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 94 , 2020), // #214
+  INST(Ldumaxlh         , BaseAtomicOp       , (0b0111100001100000011000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 95 , 2029), // #215
+  INST(Ldumin           , BaseAtomicOp       , (0b1011100000100000011100, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 96 , 2038), // #216
+  INST(Ldumina          , BaseAtomicOp       , (0b1011100010100000011100, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 97 , 2045), // #217
+  INST(Lduminab         , BaseAtomicOp       , (0b0011100010100000011100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 98 , 2053), // #218
+  INST(Lduminah         , BaseAtomicOp       , (0b0111100010100000011100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 99 , 2062), // #219
+  INST(Lduminal         , BaseAtomicOp       , (0b1011100011100000011100, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 100, 2071), // #220
+  INST(Lduminalb        , BaseAtomicOp       , (0b0011100011100000011100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 101, 2080), // #221
+  INST(Lduminalh        , BaseAtomicOp       , (0b0111100011100000011100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 102, 2090), // #222
+  INST(Lduminb          , BaseAtomicOp       , (0b0011100000100000011100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 103, 2100), // #223
+  INST(Lduminh          , BaseAtomicOp       , (0b0111100000100000011100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 104, 2108), // #224
+  INST(Lduminl          , BaseAtomicOp       , (0b1011100001100000011100, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 105, 2116), // #225
+  INST(Lduminlb         , BaseAtomicOp       , (0b0011100001100000011100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 106, 2124), // #226
+  INST(Lduminlh         , BaseAtomicOp       , (0b0111100001100000011100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 107, 2133), // #227
+  INST(Ldur             , BaseRM_SImm9       , (0b1011100001000000000000, 0b0000000000000000000000, kWX, kZR, 30, 0)                 , kRWI_W    , 0                         , 7  , 2142), // #228
+  INST(Ldurb            , BaseRM_SImm9       , (0b0011100001000000000000, 0b0000000000000000000000, kW , kZR, 0 , 0)                 , kRWI_W    , 0                         , 8  , 2147), // #229
+  INST(Ldurh            , BaseRM_SImm9       , (0b0111100001000000000000, 0b0000000000000000000000, kW , kZR, 0 , 0)                 , kRWI_W    , 0                         , 9  , 2153), // #230
+  INST(Ldursb           , BaseRM_SImm9       , (0b0011100011000000000000, 0b0000000000000000000000, kWX, kZR, 22, 0)                 , kRWI_W    , 0                         , 10 , 2159), // #231
+  INST(Ldursh           , BaseRM_SImm9       , (0b0111100011000000000000, 0b0000000000000000000000, kWX, kZR, 22, 0)                 , kRWI_W    , 0                         , 11 , 2166), // #232
+  INST(Ldursw           , BaseRM_SImm9       , (0b1011100010000000000000, 0b0000000000000000000000, kWX, kZR, 0 , 0)                 , kRWI_W    , 0                         , 12 , 2173), // #233
+  INST(Ldxp             , BaseLdxp           , (0b1000100001111111000000, kWX, 30)                                                   , kRWI_WW   , 0                         , 1  , 2180), // #234
+  INST(Ldxr             , BaseRM_NoImm       , (0b1000100001011111011111, kWX, kZR, 30)                                              , kRWI_W    , 0                         , 10 , 2185), // #235
+  INST(Ldxrb            , BaseRM_NoImm       , (0b0000100001011111011111, kW , kZR, 0 )                                              , kRWI_W    , 0                         , 11 , 2190), // #236
+  INST(Ldxrh            , BaseRM_NoImm       , (0b0100100001011111011111, kW , kZR, 0 )                                              , kRWI_W    , 0                         , 12 , 2196), // #237
+  INST(Lsl              , BaseShift          , (0b0001101011000000001000, 0b0101001100000000000000, 0)                               , kRWI_W    , 0                         , 2  , 2880), // #238
+  INST(Lslv             , BaseShift          , (0b0001101011000000001000, 0b0000000000000000000000, 0)                               , kRWI_W    , 0                         , 3  , 2202), // #239
+  INST(Lsr              , BaseShift          , (0b0001101011000000001001, 0b0101001100000000011111, 0)                               , kRWI_W    , 0                         , 4  , 2207), // #240
+  INST(Lsrv             , BaseShift          , (0b0001101011000000001001, 0b0000000000000000000000, 0)                               , kRWI_W    , 0                         , 5  , 2211), // #241
+  INST(Madd             , BaseRRRR           , (0b0001101100000000000000, kWX, kZR, kWX, kZR, kWX, kZR, kWX, kZR, true)              , kRWI_W    , 0                         , 0  , 977 ), // #242
+  INST(Mneg             , BaseRRR            , (0b0001101100000000111111, kWX, kZR, kWX, kZR, kWX, kZR, true)                        , kRWI_W    , 0                         , 11 , 2216), // #243
+  INST(Mov              , BaseMov            , (_)                                                                                   , kRWI_W    , 0                         , 0  , 949 ), // #244
+  INST(Movk             , BaseMovKNZ         , (0b01110010100000000000000000000000)                                                  , kRWI_X    , 0                         , 0  , 2226), // #245
+  INST(Movn             , BaseMovKNZ         , (0b00010010100000000000000000000000)                                                  , kRWI_W    , 0                         , 1  , 2231), // #246
+  INST(Movz             , BaseMovKNZ         , (0b01010010100000000000000000000000)                                                  , kRWI_W    , 0                         , 2  , 2236), // #247
+  INST(Mrs              , BaseMrs            , (_)                                                                                   , kRWI_W    , 0                         , 0  , 2241), // #248
+  INST(Msr              , BaseMsr            , (_)                                                                                   , kRWI_W    , 0                         , 0  , 2245), // #249
+  INST(Msub             , BaseRRRR           , (0b0001101100000000100000, kWX, kZR, kWX, kZR, kWX, kZR, kWX, kZR, true)              , kRWI_W    , 0                         , 1  , 984 ), // #250
+  INST(Mul              , BaseRRR            , (0b0001101100000000011111, kWX, kZR, kWX, kZR, kWX, kZR, true)                        , kRWI_W    , 0                         , 12 , 991 ), // #251
+  INST(Mvn              , BaseMvnNeg         , (0b00101010001000000000001111100000)                                                  , kRWI_W    , 0                         , 0  , 2249), // #252
+  INST(Neg              , BaseMvnNeg         , (0b01001011000000000000001111100000)                                                  , kRWI_W    , 0                         , 1  , 540 ), // #253
+  INST(Negs             , BaseMvnNeg         , (0b01101011000000000000001111100000)                                                  , kRWI_W    , 0                         , 2  , 2258), // #254
+  INST(Ngc              , BaseRR             , (0b01011010000000000000001111100000, kWX, kZR, 0, kWX, kZR, 16, true)                 , kRWI_W    , 0                         , 7  , 2263), // #255
+  INST(Ngcs             , BaseRR             , (0b01111010000000000000001111100000, kWX, kZR, 0, kWX, kZR, 16, true)                 , kRWI_W    , 0                         , 8  , 2267), // #256
+  INST(Nop              , BaseOp             , (0b11010101000000110010000000011111)                                                  , 0         , 0                         , 13 , 2272), // #257
+  INST(Orn              , BaseLogical        , (0b0101010001, 0b01100100, 1)                                                         , kRWI_W    , 0                         , 6  , 2280), // #258
+  INST(Orr              , BaseLogical        , (0b0101010000, 0b01100100, 0)                                                         , kRWI_W    , 0                         , 7  , 2284), // #259
+  INST(Pacda            , BaseRR             , (0b11011010110000010000100000000000, kX, kZR, 0, kX, kSP, 5, true)                    , kRWI_X    , 0                         , 9  , 2288), // #260
+  INST(Pacdb            , BaseRR             , (0b11011010110000010000110000000000, kX, kZR, 0, kX, kSP, 5, true)                    , kRWI_X    , 0                         , 10 , 2294), // #261
+  INST(Pacdza           , BaseR              , (0b11011010110000010010101111100000, kX, kZR, 0)                                      , kRWI_X    , 0                         , 4  , 2300), // #262
+  INST(Pacdzb           , BaseR              , (0b11011010110000010010111111100000, kX, kZR, 0)                                      , kRWI_X    , 0                         , 5  , 2307), // #263
+  INST(Pacga            , BaseRRR            , (0b1001101011000000001100, kX, kZR, kX, kZR, kX, kSP, false)                          , kRWI_W    , 0                         , 13 , 2314), // #264
+  INST(Pssbb            , BaseOp             , (0b11010101000000110011010010011111)                                                  , 0         , 0                         , 14 , 2338), // #265
+  INST(Rbit             , BaseRR             , (0b01011010110000000000000000000000, kWX, kZR, 0, kWX, kZR, 5, true)                  , kRWI_W    , 0                         , 11 , 2364), // #266
+  INST(Ret              , BaseBranchReg      , (0b11010110010111110000000000000000)                                                  , kRWI_R    , 0                         , 2  , 593 ), // #267
+  INST(Rev              , BaseRev            , (_)                                                                                   , kRWI_W    , 0                         , 0  , 2369), // #268
+  INST(Rev16            , BaseRR             , (0b01011010110000000000010000000000, kWX, kZR, 0, kWX, kZR, 5, true)                  , kRWI_W    , 0                         , 12 , 2373), // #269
+  INST(Rev32            , BaseRR             , (0b11011010110000000000100000000000, kWX, kZR, 0, kWX, kZR, 5, true)                  , kRWI_W    , 0                         , 13 , 2379), // #270
+  INST(Rev64            , BaseRR             , (0b11011010110000000000110000000000, kWX, kZR, 0, kWX, kZR, 5, true)                  , kRWI_W    , 0                         , 14 , 2385), // #271
+  INST(Ror              , BaseShift          , (0b0001101011000000001011, 0b0001001110000000000000, 1)                               , kRWI_W    , 0                         , 6  , 2391), // #272
+  INST(Rorv             , BaseShift          , (0b0001101011000000001011, 0b0000000000000000000000, 1)                               , kRWI_W    , 0                         , 7  , 2395), // #273
+  INST(Sbc              , BaseRRR            , (0b0101101000000000000000, kWX, kZR, kWX, kZR, kWX, kZR, true)                        , kRWI_W    , 0                         , 14 , 2498), // #274
+  INST(Sbcs             , BaseRRR            , (0b0111101000000000000000, kWX, kZR, kWX, kZR, kWX, kZR, true)                        , kRWI_W    , 0                         , 15 , 2502), // #275
+  INST(Sbfiz            , BaseBfi            , (0b00010011000000000000000000000000)                                                  , kRWI_W    , 0                         , 1  , 2507), // #276
+  INST(Sbfm             , BaseBfm            , (0b00010011000000000000000000000000)                                                  , kRWI_W    , 0                         , 1  , 2513), // #277
+  INST(Sbfx             , BaseBfx            , (0b00010011000000000000000000000000)                                                  , kRWI_W    , 0                         , 1  , 2518), // #278
+  INST(Sdiv             , BaseRRR            , (0b0001101011000000000011, kWX, kZR, kWX, kZR, kWX, kZR, true)                        , kRWI_W    , 0                         , 16 , 2529), // #279
+  INST(Setf8            , BaseR              , (0b00111010000000000000100000001101, kW, kZR, 5)                                      , 0         , 0                         , 6  , 2541), // #280
+  INST(Setf16           , BaseR              , (0b00111010000000000100100000001101, kW, kZR, 5)                                      , 0         , 0                         , 7  , 2534), // #281
+  INST(Sev              , BaseOp             , (0b11010101000000110010000010011111)                                                  , 0         , 0                         , 15 , 2547), // #282
+  INST(Sevl             , BaseOp             , (0b11010101000000110010000010111111)                                                  , 0         , 0                         , 16 , 2551), // #283
+  INST(Smaddl           , BaseRRRR           , (0b1001101100100000000000, kX , kZR, kW , kZR, kW , kZR, kX , kZR, false)             , kRWI_W    , 0                         , 2  , 2758), // #284
+  INST(Smc              , BaseOpImm          , (0b11010100000000000000000000000011, 16, 5)                                           , 0         , 0                         , 11 , 53  ), // #285
+  INST(Smnegl           , BaseRRR            , (0b1001101100100000111111, kX , kZR, kW , kZR, kW , kZR, false)                       , kRWI_W    , 0                         , 17 , 2815), // #286
+  INST(Smsubl           , BaseRRRR           , (0b1001101100100000100000, kX , kZR, kW , kZR, kW , kZR, kX , kZR, false)             , kRWI_W    , 0                         , 3  , 2827), // #287
+  INST(Smulh            , BaseRRR            , (0b1001101101000000011111, kX , kZR, kX , kZR, kX , kZR, true)                        , kRWI_W    , 0                         , 18 , 2834), // #288
+  INST(Smull            , BaseRRR            , (0b1001101100100000011111, kX , kZR, kW , kZR, kW , kZR, false)                       , kRWI_W    , 0                         , 19 , 2840), // #289
+  INST(Ssbb             , BaseOp             , (0b11010101000000110011000010011111)                                                  , 0         , 0                         , 17 , 2339), // #290
+  INST(St2g             , BaseRM_SImm9       , (0b1101100110100000000010, 0b1101100110100000000001, kX, kSP, 0, 4)                   , kRWI_RW   , 0                         , 13 , 3164), // #291
+  INST(Stadd            , BaseAtomicSt       , (0b1011100000100000000000, kWX, 30)                                                   , kRWI_RX   , 0                         , 0  , 3177), // #292
+  INST(Staddl           , BaseAtomicSt       , (0b1011100001100000000000, kWX, 30)                                                   , kRWI_RX   , 0                         , 1  , 3197), // #293
+  INST(Staddb           , BaseAtomicSt       , (0b0011100000100000000000, kW , 0 )                                                   , kRWI_RX   , 0                         , 2  , 3183), // #294
+  INST(Staddlb          , BaseAtomicSt       , (0b0011100001100000000000, kW , 0 )                                                   , kRWI_RX   , 0                         , 3  , 3204), // #295
+  INST(Staddh           , BaseAtomicSt       , (0b0111100000100000000000, kW , 0 )                                                   , kRWI_RX   , 0                         , 4  , 3190), // #296
+  INST(Staddlh          , BaseAtomicSt       , (0b0111100001100000000000, kW , 0 )                                                   , kRWI_RX   , 0                         , 5  , 3212), // #297
+  INST(Stclr            , BaseAtomicSt       , (0b1011100000100000000100, kWX, 30)                                                   , kRWI_RX   , 0                         , 6  , 3220), // #298
+  INST(Stclrl           , BaseAtomicSt       , (0b1011100001100000000100, kWX, 30)                                                   , kRWI_RX   , 0                         , 7  , 3240), // #299
+  INST(Stclrb           , BaseAtomicSt       , (0b0011100000100000000100, kW , 0 )                                                   , kRWI_RX   , 0                         , 8  , 3226), // #300
+  INST(Stclrlb          , BaseAtomicSt       , (0b0011100001100000000100, kW , 0 )                                                   , kRWI_RX   , 0                         , 9  , 3247), // #301
+  INST(Stclrh           , BaseAtomicSt       , (0b0111100000100000000100, kW , 0 )                                                   , kRWI_RX   , 0                         , 10 , 3233), // #302
+  INST(Stclrlh          , BaseAtomicSt       , (0b0111100001100000000100, kW , 0 )                                                   , kRWI_RX   , 0                         , 11 , 3255), // #303
+  INST(Steor            , BaseAtomicSt       , (0b1011100000100000001000, kWX, 30)                                                   , kRWI_RX   , 0                         , 12 , 3263), // #304
+  INST(Steorl           , BaseAtomicSt       , (0b1011100001100000001000, kWX, 30)                                                   , kRWI_RX   , 0                         , 13 , 3283), // #305
+  INST(Steorb           , BaseAtomicSt       , (0b0011100000100000001000, kW , 0 )                                                   , kRWI_RX   , 0                         , 14 , 3269), // #306
+  INST(Steorlb          , BaseAtomicSt       , (0b0011100001100000001000, kW , 0 )                                                   , kRWI_RX   , 0                         , 15 , 3290), // #307
+  INST(Steorh           , BaseAtomicSt       , (0b0111100000100000001000, kW , 0 )                                                   , kRWI_RX   , 0                         , 16 , 3276), // #308
+  INST(Steorlh          , BaseAtomicSt       , (0b0111100001100000001000, kW , 0 )                                                   , kRWI_RX   , 0                         , 17 , 3298), // #309
+  INST(Stg              , BaseRM_SImm9       , (0b1101100100100000000010, 0b1101100100100000000001, kX, kSP, 0, 4)                   , kRWI_RW   , 0                         , 14 , 3306), // #310
+  INST(Stgm             , BaseRM_NoImm       , (0b1101100110100000000000, kX , kZR, 0 )                                              , kRWI_RW   , 0                         , 13 , 3310), // #311
+  INST(Stgp             , BaseLdpStp         , (0b0110100100, 0b0110100010, kX, 0, 4)                                                , kRWI_RRW  , 0                         , 3  , 3315), // #312
+  INST(Stllr            , BaseRM_NoImm       , (0b1000100010011111011111, kWX, kZR, 30)                                              , kRWI_RW   , 0                         , 14 , 3320), // #313
+  INST(Stllrb           , BaseRM_NoImm       , (0b0000100010011111011111, kW , kZR, 0 )                                              , kRWI_RW   , 0                         , 15 , 3326), // #314
+  INST(Stllrh           , BaseRM_NoImm       , (0b0100100010011111011111, kW , kZR, 0 )                                              , kRWI_RW   , 0                         , 16 , 3333), // #315
+  INST(Stlr             , BaseRM_NoImm       , (0b1000100010011111111111, kWX, kZR, 30)                                              , kRWI_RW   , 0                         , 17 , 3340), // #316
+  INST(Stlrb            , BaseRM_NoImm       , (0b0000100010011111111111, kW , kZR, 0 )                                              , kRWI_RW   , 0                         , 18 , 3345), // #317
+  INST(Stlrh            , BaseRM_NoImm       , (0b0100100010011111111111, kW , kZR, 0 )                                              , kRWI_RW   , 0                         , 19 , 3351), // #318
+  INST(Stlxp            , BaseStxp           , (0b1000100000100000100000, kWX, 30)                                                   , kRWI_WRRX , 0                         , 0  , 3357), // #319
+  INST(Stlxr            , BaseAtomicOp       , (0b1000100000000000111111, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 108, 3363), // #320
+  INST(Stlxrb           , BaseAtomicOp       , (0b0000100000000000111111, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 109, 3369), // #321
+  INST(Stlxrh           , BaseAtomicOp       , (0b0100100000000000111111, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 110, 3376), // #322
+  INST(Stnp             , BaseLdpStp         , (0b0010100000, 0           , kWX, 31, 2)                                              , kRWI_RRW  , 0                         , 4  , 3383), // #323
+  INST(Stp              , BaseLdpStp         , (0b0010100100, 0b0010100010, kWX, 31, 2)                                              , kRWI_RRW  , 0                         , 5  , 3388), // #324
+  INST(Str              , BaseLdSt           , (0b1011100100, 0b10111000000, 0b10111000001, 0         , kWX, 30, 2, Inst::kIdStur)   , kRWI_RW   , 0                         , 6  , 3392), // #325
+  INST(Strb             , BaseLdSt           , (0b0011100100, 0b00111000000, 0b00111000001, 0         , kW , 30, 0, Inst::kIdSturb)  , kRWI_RW   , 0                         , 7  , 3396), // #326
+  INST(Strh             , BaseLdSt           , (0b0111100100, 0b01111000000, 0b01111000001, 0         , kWX, 30, 1, Inst::kIdSturh)  , kRWI_RW   , 0                         , 8  , 3401), // #327
+  INST(Stset            , BaseAtomicSt       , (0b1011100000100000001100, kWX, 30)                                                   , kRWI_RX   , 0                         , 18 , 3406), // #328
+  INST(Stsetl           , BaseAtomicSt       , (0b1011100001100000001100, kWX, 30)                                                   , kRWI_RX   , 0                         , 19 , 3426), // #329
+  INST(Stsetb           , BaseAtomicSt       , (0b0011100000100000001100, kW , 0 )                                                   , kRWI_RX   , 0                         , 20 , 3412), // #330
+  INST(Stsetlb          , BaseAtomicSt       , (0b0011100001100000001100, kW , 0 )                                                   , kRWI_RX   , 0                         , 21 , 3433), // #331
+  INST(Stseth           , BaseAtomicSt       , (0b0111100000100000001100, kW , 0 )                                                   , kRWI_RX   , 0                         , 22 , 3419), // #332
+  INST(Stsetlh          , BaseAtomicSt       , (0b0111100001100000001100, kW , 0 )                                                   , kRWI_RX   , 0                         , 23 , 3441), // #333
+  INST(Stsmax           , BaseAtomicSt       , (0b1011100000100000010000, kWX, 30)                                                   , kRWI_RX   , 0                         , 24 , 3449), // #334
+  INST(Stsmaxl          , BaseAtomicSt       , (0b1011100001100000010000, kWX, 30)                                                   , kRWI_RX   , 0                         , 25 , 3472), // #335
+  INST(Stsmaxb          , BaseAtomicSt       , (0b0011100000100000010000, kW , 0 )                                                   , kRWI_RX   , 0                         , 26 , 3456), // #336
+  INST(Stsmaxlb         , BaseAtomicSt       , (0b0011100001100000010000, kW , 0 )                                                   , kRWI_RX   , 0                         , 27 , 3480), // #337
+  INST(Stsmaxh          , BaseAtomicSt       , (0b0111100000100000010000, kW , 0 )                                                   , kRWI_RX   , 0                         , 28 , 3464), // #338
+  INST(Stsmaxlh         , BaseAtomicSt       , (0b0111100001100000010000, kW , 0 )                                                   , kRWI_RX   , 0                         , 29 , 3489), // #339
+  INST(Stsmin           , BaseAtomicSt       , (0b1011100000100000010100, kWX, 30)                                                   , kRWI_RX   , 0                         , 30 , 3498), // #340
+  INST(Stsminl          , BaseAtomicSt       , (0b1011100001100000010100, kWX, 30)                                                   , kRWI_RX   , 0                         , 31 , 3521), // #341
+  INST(Stsminb          , BaseAtomicSt       , (0b0011100000100000010100, kW , 0 )                                                   , kRWI_RX   , 0                         , 32 , 3505), // #342
+  INST(Stsminlb         , BaseAtomicSt       , (0b0011100001100000010100, kW , 0 )                                                   , kRWI_RX   , 0                         , 33 , 3529), // #343
+  INST(Stsminh          , BaseAtomicSt       , (0b0111100000100000010100, kW , 0 )                                                   , kRWI_RX   , 0                         , 34 , 3513), // #344
+  INST(Stsminlh         , BaseAtomicSt       , (0b0111100001100000010100, kW , 0 )                                                   , kRWI_RX   , 0                         , 35 , 3538), // #345
+  INST(Sttr             , BaseRM_SImm9       , (0b1011100000000000000010, 0b0000000000000000000000, kWX, kZR, 30, 0)                 , kRWI_RW   , 0                         , 15 , 3547), // #346
+  INST(Sttrb            , BaseRM_SImm9       , (0b0011100000000000000010, 0b0000000000000000000000, kW , kZR, 0 , 0)                 , kRWI_RW   , 0                         , 16 , 3552), // #347
+  INST(Sttrh            , BaseRM_SImm9       , (0b0111100000000000000010, 0b0000000000000000000000, kW , kZR, 0 , 0)                 , kRWI_RW   , 0                         , 17 , 3558), // #348
+  INST(Stumax           , BaseAtomicSt       , (0b1011100000100000011000, kWX, 30)                                                   , kRWI_RX   , 0                         , 36 , 3564), // #349
+  INST(Stumaxl          , BaseAtomicSt       , (0b1011100001100000011000, kWX, 30)                                                   , kRWI_RX   , 0                         , 37 , 3587), // #350
+  INST(Stumaxb          , BaseAtomicSt       , (0b0011100000100000011000, kW , 0 )                                                   , kRWI_RX   , 0                         , 38 , 3571), // #351
+  INST(Stumaxlb         , BaseAtomicSt       , (0b0011100001100000011000, kW , 0 )                                                   , kRWI_RX   , 0                         , 39 , 3595), // #352
+  INST(Stumaxh          , BaseAtomicSt       , (0b0111100000100000011000, kW , 0 )                                                   , kRWI_RX   , 0                         , 40 , 3579), // #353
+  INST(Stumaxlh         , BaseAtomicSt       , (0b0111100001100000011000, kW , 0 )                                                   , kRWI_RX   , 0                         , 41 , 3604), // #354
+  INST(Stumin           , BaseAtomicSt       , (0b1011100000100000011100, kWX, 30)                                                   , kRWI_RX   , 0                         , 42 , 3613), // #355
+  INST(Stuminl          , BaseAtomicSt       , (0b1011100001100000011100, kWX, 30)                                                   , kRWI_RX   , 0                         , 43 , 3636), // #356
+  INST(Stuminb          , BaseAtomicSt       , (0b0011100000100000011100, kW , 0 )                                                   , kRWI_RX   , 0                         , 44 , 3620), // #357
+  INST(Stuminlb         , BaseAtomicSt       , (0b0011100001100000011100, kW , 0 )                                                   , kRWI_RX   , 0                         , 45 , 3644), // #358
+  INST(Stuminh          , BaseAtomicSt       , (0b0111100000100000011100, kW , 0 )                                                   , kRWI_RX   , 0                         , 46 , 3628), // #359
+  INST(Stuminlh         , BaseAtomicSt       , (0b0111100001100000011100, kW , 0 )                                                   , kRWI_RX   , 0                         , 47 , 3653), // #360
+  INST(Stur             , BaseRM_SImm9       , (0b1011100000000000000000, 0b0000000000000000000000, kWX, kZR, 30, 0)                 , kRWI_RW   , 0                         , 18 , 3662), // #361
+  INST(Sturb            , BaseRM_SImm9       , (0b0011100000000000000000, 0b0000000000000000000000, kW , kZR, 0 , 0)                 , kRWI_RW   , 0                         , 19 , 3667), // #362
+  INST(Sturh            , BaseRM_SImm9       , (0b0111100000000000000000, 0b0000000000000000000000, kW , kZR, 0 , 0)                 , kRWI_RW   , 0                         , 20 , 3673), // #363
+  INST(Stxp             , BaseStxp           , (0b1000100000100000000000, kWX, 30)                                                   , kRWI_WRRW , 0                         , 1  , 3679), // #364
+  INST(Stxr             , BaseStx            , (0b1000100000000000011111, kWX, 30)                                                   , kRWI_WRW  , 0                         , 0  , 3684), // #365
+  INST(Stxrb            , BaseStx            , (0b0000100000000000011111, kW , 0 )                                                   , kRWI_WRW  , 0                         , 1  , 3689), // #366
+  INST(Stxrh            , BaseStx            , (0b0100100000000000011111, kW , 0 )                                                   , kRWI_WRW  , 0                         , 2  , 3695), // #367
+  INST(Stz2g            , BaseRM_SImm9       , (0b1101100111100000000010, 0b1101100111100000000001, kX , kSP, 0, 4)                  , kRWI_RW   , 0                         , 21 , 3701), // #368
+  INST(Stzg             , BaseRM_SImm9       , (0b1101100101100000000010, 0b1101100101100000000001, kX , kSP, 0, 4)                  , kRWI_RW   , 0                         , 22 , 3707), // #369
+  INST(Stzgm            , BaseRM_NoImm       , (0b1101100100100000000000, kX , kZR, 0)                                               , kRWI_RW   , 0                         , 20 , 3712), // #370
+  INST(Sub              , BaseAddSub         , (0b1001011000, 0b1001011001, 0b1010001)                                               , kRWI_X    , 0                         , 2  , 985 ), // #371
+  INST(Subg             , BaseRRII           , (0b1101000110000000000000, kX, kSP, kX, kSP, 6, 4, 16, 4, 0, 10)                      , kRWI_W    , 0                         , 1  , 3718), // #372
+  INST(Subp             , BaseRRR            , (0b1001101011000000000000, kX, kZR, kX, kSP, kX, kSP, false)                          , kRWI_W    , 0                         , 20 , 3723), // #373
+  INST(Subps            , BaseRRR            , (0b1011101011000000000000, kX, kZR, kX, kSP, kX, kSP, false)                          , kRWI_W    , 0                         , 21 , 3728), // #374
+  INST(Subs             , BaseAddSub         , (0b1101011000, 0b1101011001, 0b1110001)                                               , kRWI_X    , 0                         , 3  , 3734), // #375
+  INST(Svc              , BaseOpImm          , (0b11010100000000000000000000000001, 16, 5)                                           , 0         , 0                         , 12 , 3752), // #376
+  INST(Swp              , BaseAtomicOp       , (0b1011100000100000100000, kWX, 30, 1)                                                , kRWI_RWX  , 0                         , 111, 3756), // #377
+  INST(Swpa             , BaseAtomicOp       , (0b1011100010100000100000, kWX, 30, 1)                                                , kRWI_RWX  , 0                         , 112, 3760), // #378
+  INST(Swpab            , BaseAtomicOp       , (0b0011100010100000100000, kW , 0 , 1)                                                , kRWI_RWX  , 0                         , 113, 3765), // #379
+  INST(Swpah            , BaseAtomicOp       , (0b0111100010100000100000, kW , 0 , 1)                                                , kRWI_RWX  , 0                         , 114, 3771), // #380
+  INST(Swpal            , BaseAtomicOp       , (0b1011100011100000100000, kWX, 30, 1)                                                , kRWI_RWX  , 0                         , 115, 3777), // #381
+  INST(Swpalb           , BaseAtomicOp       , (0b0011100011100000100000, kW , 0 , 1)                                                , kRWI_RWX  , 0                         , 116, 3783), // #382
+  INST(Swpalh           , BaseAtomicOp       , (0b0111100011100000100000, kW , 0 , 1)                                                , kRWI_RWX  , 0                         , 117, 3790), // #383
+  INST(Swpb             , BaseAtomicOp       , (0b0011100000100000100000, kW , 0 , 1)                                                , kRWI_RWX  , 0                         , 118, 3797), // #384
+  INST(Swph             , BaseAtomicOp       , (0b0111100000100000100000, kW , 0 , 1)                                                , kRWI_RWX  , 0                         , 119, 3802), // #385
+  INST(Swpl             , BaseAtomicOp       , (0b1011100001100000100000, kWX, 30, 1)                                                , kRWI_RWX  , 0                         , 120, 3807), // #386
+  INST(Swplb            , BaseAtomicOp       , (0b0011100001100000100000, kW , 0 , 1)                                                , kRWI_RWX  , 0                         , 121, 3812), // #387
+  INST(Swplh            , BaseAtomicOp       , (0b0111100001100000100000, kW , 0 , 1)                                                , kRWI_RWX  , 0                         , 122, 3818), // #388
+  INST(Sxtb             , BaseExtend         , (0b0001001100000000000111, kWX, 0)                                                    , kRWI_W    , 0                         , 0  , 3824), // #389
+  INST(Sxth             , BaseExtend         , (0b0001001100000000001111, kWX, 0)                                                    , kRWI_W    , 0                         , 1  , 3829), // #390
+  INST(Sxtw             , BaseExtend         , (0b1001001101000000011111, kX , 0)                                                    , kRWI_W    , 0                         , 2  , 3845), // #391
+  INST(Sys              , BaseSys            , (_)                                                                                   , kRWI_W    , 0                         , 0  , 3850), // #392
+  INST(Tlbi             , BaseAtDcIcTlbi     , (0b00011110000000, 0b00010000000000, false)                                           , kRWI_RX   , 0                         , 3  , 3871), // #393
+  INST(Tst              , BaseTst            , (0b1101010000, 0b111001000)                                                           , kRWI_R    , 0                         , 0  , 437 ), // #394
+  INST(Tbnz             , BaseBranchTst      , (0b00110111000000000000000000000000)                                                  , kRWI_R    , 0                         , 0  , 3858), // #395
+  INST(Tbz              , BaseBranchTst      , (0b00110110000000000000000000000000)                                                  , kRWI_R    , 0                         , 1  , 3867), // #396
+  INST(Ubfiz            , BaseBfi            , (0b01010011000000000000000000000000)                                                  , kRWI_W    , 0                         , 2  , 3969), // #397
+  INST(Ubfm             , BaseBfm            , (0b01010011000000000000000000000000)                                                  , kRWI_W    , 0                         , 2  , 3975), // #398
+  INST(Ubfx             , BaseBfx            , (0b01010011000000000000000000000000)                                                  , kRWI_W    , 0                         , 2  , 3980), // #399
+  INST(Udf              , BaseOpImm          , (0b00000000000000000000000000000000, 16, 0)                                           , 0         , 0                         , 13 , 3991), // #400
+  INST(Udiv             , BaseRRR            , (0b0001101011000000000010, kWX, kZR, kWX, kZR, kWX, kZR, true)                        , kRWI_W    , 0                         , 22 , 3995), // #401
+  INST(Umaddl           , BaseRRRR           , (0b1001101110100000000000, kX , kZR, kW , kZR, kW , kZR, kX , kZR, false)             , kRWI_W    , 0                         , 4  , 4012), // #402
+  INST(Umnegl           , BaseRRR            , (0b1001101110100000111111, kX , kZR, kW , kZR, kW , kZR, false)                       , kRWI_W    , 0                         , 23 , 4075), // #403
+  INST(Umull            , BaseRRR            , (0b1001101110100000011111, kX , kZR, kW , kZR, kW , kZR, false)                       , kRWI_W    , 0                         , 24 , 4100), // #404
+  INST(Umulh            , BaseRRR            , (0b1001101111000000011111, kX , kZR, kX , kZR, kX , kZR, false)                       , kRWI_W    , 0                         , 25 , 4094), // #405
+  INST(Umsubl           , BaseRRRR           , (0b1001101110100000100000, kX , kZR, kW , kZR, kW , kZR, kX , kZR, false)             , kRWI_W    , 0                         , 5  , 4087), // #406
+  INST(Uxtb             , BaseExtend         , (0b0101001100000000000111, kW, 1)                                                     , kRWI_W    , 0                         , 3  , 4291), // #407
+  INST(Uxth             , BaseExtend         , (0b0101001100000000001111, kW, 1)                                                     , kRWI_W    , 0                         , 4  , 4296), // #408
+  INST(Wfe              , BaseOp             , (0b11010101000000110010000001011111)                                                  , 0         , 0                         , 18 , 4322), // #409
+  INST(Wfi              , BaseOp             , (0b11010101000000110010000001111111)                                                  , 0         , 0                         , 19 , 4326), // #410
+  INST(Xaflag           , BaseOp             , (0b11010101000000000100000000111111)                                                  , 0         , 0                         , 20 , 4330), // #411
+  INST(Xpacd            , BaseR              , (0b11011010110000010100011111100000, kX, kZR, 0)                                      , kRWI_X    , 0                         , 8  , 4341), // #412
+  INST(Xpaci            , BaseR              , (0b11011010110000010100001111100000, kX, kZR, 0)                                      , kRWI_X    , 0                         , 9  , 4347), // #413
+  INST(Xpaclri          , BaseOp             , (0b11010101000000110010000011111111)                                                  , kRWI_X    , 0                         , 21 , 4353), // #414
+  INST(Yield            , BaseOp             , (0b11010101000000110010000000111111)                                                  , 0         , 0                         , 22 , 4361), // #415
+  INST(Abs_v            , ISimdVV            , (0b0000111000100000101110, kVO_V_Any)                                                 , kRWI_W    , 0                         , 0  , 2855), // #416
+  INST(Add_v            , ISimdVVV           , (0b0000111000100000100001, kVO_V_Any)                                                 , kRWI_W    , 0                         , 0  , 978 ), // #417
+  INST(Addhn_v          , ISimdVVV           , (0b0000111000100000010000, kVO_V_B8H4S2)                                              , kRWI_W    , F(Narrow)                 , 1  , 2345), // #418
+  INST(Addhn2_v         , ISimdVVV           , (0b0100111000100000010000, kVO_V_B16H8S4)                                             , kRWI_W    , F(Narrow)                 , 2  , 2352), // #419
+  INST(Addp_v           , ISimdPair          , (0b0101111000110001101110, 0b0000111000100000101111, kVO_V_Any)                       , kRWI_W    , F(Pair)                   , 0  , 638 ), // #420
+  INST(Addv_v           , ISimdSV            , (0b0000111000110001101110, kVO_V_BH_4S)                                               , kRWI_W    , 0                         , 0  , 20  ), // #421
+  INST(Aesd_v           , ISimdVVx           , (0b0100111000101000010110, kOp_V16B, kOp_V16B)                                        , kRWI_X    , 0                         , 0  , 34  ), // #422
+  INST(Aese_v           , ISimdVVx           , (0b0100111000101000010010, kOp_V16B, kOp_V16B)                                        , kRWI_X    , 0                         , 1  , 39  ), // #423
+  INST(Aesimc_v         , ISimdVVx           , (0b0100111000101000011110, kOp_V16B, kOp_V16B)                                        , kRWI_W    , 0                         , 2  , 44  ), // #424
+  INST(Aesmc_v          , ISimdVVx           , (0b0100111000101000011010, kOp_V16B, kOp_V16B)                                        , kRWI_W    , 0                         , 3  , 51  ), // #425
+  INST(And_v            , ISimdVVV           , (0b0000111000100000000111, kVO_V_B)                                                   , kRWI_W    , 0                         , 3  , 57  ), // #426
+  INST(Bcax_v           , ISimdVVVV          , (0b1100111000100000000000, kVO_V_B16)                                                 , kRWI_W    , 0                         , 0  , 187 ), // #427
+  INST(Bfcvt_v          , ISimdVVx           , (0b0001111001100011010000, kOp_H, kOp_S)                                              , kRWI_W    , 0                         , 4  , 196 ), // #428
+  INST(Bfcvtn_v         , ISimdVVx           , (0b0000111010100001011010, kOp_V4H, kOp_V4S)                                          , kRWI_W    , F(Narrow)                 , 5  , 202 ), // #429
+  INST(Bfcvtn2_v        , ISimdVVx           , (0b0100111010100001011010, kOp_V8H, kOp_V4S)                                          , kRWI_W    , F(Narrow)                 , 6  , 209 ), // #430
+  INST(Bfdot_v          , SimdDot            , (0b0010111001000000111111, 0b0000111101000000111100, kET_S, kET_H, kET_2H)            , kRWI_X    , 0                         , 0  , 217 ), // #431
+  INST(Bfmlalb_v        , SimdFmlal          , (0b0010111011000000111111, 0b0000111111000000111100, 0, kET_S, kET_H, kET_H)          , kRWI_X    , F(VH0_15)                 , 0  , 227 ), // #432
+  INST(Bfmlalt_v        , SimdFmlal          , (0b0110111011000000111111, 0b0100111111000000111100, 0, kET_S, kET_H, kET_H)          , kRWI_X    , F(VH0_15)                 , 1  , 235 ), // #433
+  INST(Bfmmla_v         , ISimdVVVx          , (0b0110111001000000111011, kOp_V4S, kOp_V8H, kOp_V8H)                                 , kRWI_X    , F(Long)                   , 0  , 243 ), // #434
+  INST(Bic_v            , SimdBicOrr         , (0b0000111001100000000111, 0b0010111100000000000001)                                  , kRWI_W    , 0                         , 0  , 256 ), // #435
+  INST(Bif_v            , ISimdVVV           , (0b0010111011100000000111, kVO_V_B)                                                   , kRWI_X    , 0                         , 4  , 265 ), // #436
+  INST(Bit_v            , ISimdVVV           , (0b0010111010100000000111, kVO_V_B)                                                   , kRWI_X    , 0                         , 5  , 2365), // #437
+  INST(Bsl_v            , ISimdVVV           , (0b0010111001100000000111, kVO_V_B)                                                   , kRWI_X    , 0                         , 6  , 280 ), // #438
+  INST(Cls_v            , ISimdVV            , (0b0000111000100000010010, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 1  , 412 ), // #439
+  INST(Clz_v            , ISimdVV            , (0b0010111000100000010010, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 2  , 416 ), // #440
+  INST(Cmeq_v           , SimdCmp            , (0b0010111000100000100011, 0b0000111000100000100110, kVO_V_Any)                       , kRWI_W    , 0                         , 0  , 663 ), // #441
+  INST(Cmge_v           , SimdCmp            , (0b0000111000100000001111, 0b0010111000100000100010, kVO_V_Any)                       , kRWI_W    , 0                         , 1  , 669 ), // #442
+  INST(Cmgt_v           , SimdCmp            , (0b0000111000100000001101, 0b0000111000100000100010, kVO_V_Any)                       , kRWI_W    , 0                         , 2  , 675 ), // #443
+  INST(Cmhi_v           , SimdCmp            , (0b0010111000100000001101, 0b0000000000000000000000, kVO_V_Any)                       , kRWI_W    , 0                         , 3  , 420 ), // #444
+  INST(Cmhs_v           , SimdCmp            , (0b0010111000100000001111, 0b0000000000000000000000, kVO_V_Any)                       , kRWI_W    , 0                         , 4  , 425 ), // #445
+  INST(Cmle_v           , SimdCmp            , (0b0000000000000000000000, 0b0010111000100000100110, kVO_V_Any)                       , kRWI_W    , 0                         , 5  , 687 ), // #446
+  INST(Cmlt_v           , SimdCmp            , (0b0000000000000000000000, 0b0000111000100000101010, kVO_V_Any)                       , kRWI_W    , 0                         , 6  , 693 ), // #447
+  INST(Cmtst_v          , ISimdVVV           , (0b0000111000100000100011, kVO_V_Any)                                                 , kRWI_W    , 0                         , 7  , 435 ), // #448
+  INST(Cnt_v            , ISimdVV            , (0b0000111000100000010110, kVO_V_B)                                                   , kRWI_W    , 0                         , 3  , 446 ), // #449
+  INST(Dup_v            , SimdDup            , (_)                                                                                   , kRWI_W    , 0                         , 0  , 579 ), // #450
+  INST(Eor_v            , ISimdVVV           , (0b0010111000100000000111, kVO_V_B)                                                   , kRWI_W    , 0                         , 8  , 1418), // #451
+  INST(Eor3_v           , ISimdVVVV          , (0b1100111000000000000000, kVO_V_B16)                                                 , kRWI_W    , 0                         , 1  , 587 ), // #452
+  INST(Ext_v            , ISimdVVVI          , (0b0010111000000000000000, kVO_V_B, 4, 11, 1)                                         , kRWI_W    , 0                         , 0  , 601 ), // #453
+  INST(Fabd_v           , FSimdVVV           , (0b0111111010100000110101, kHF_C, 0b0010111010100000110101, kHF_C)                    , kRWI_W    , 0                         , 0  , 610 ), // #454
+  INST(Fabs_v           , FSimdVV            , (0b0001111000100000110000, kHF_A, 0b0000111010100000111110, kHF_B)                    , kRWI_W    , 0                         , 0  , 615 ), // #455
+  INST(Facge_v          , FSimdVVV           , (0b0111111000100000111011, kHF_C, 0b0010111000100000111011, kHF_C)                    , kRWI_W    , 0                         , 1  , 620 ), // #456
+  INST(Facgt_v          , FSimdVVV           , (0b0111111010100000111011, kHF_C, 0b0010111010100000111011, kHF_C)                    , kRWI_W    , 0                         , 2  , 626 ), // #457
+  INST(Fadd_v           , FSimdVVV           , (0b0001111000100000001010, kHF_A, 0b0000111000100000110101, kHF_C)                    , kRWI_W    , 0                         , 3  , 632 ), // #458
+  INST(Faddp_v          , FSimdPair          , (0b0111111000110000110110, 0b0010111000100000110101)                                  , kRWI_W    , 0                         , 0  , 637 ), // #459
+  INST(Fcadd_v          , SimdFcadd          , (0b0010111000000000111001)                                                            , kRWI_W    , 0                         , 0  , 643 ), // #460
+  INST(Fccmp_v          , SimdFccmpFccmpe    , (0b00011110001000000000010000000000)                                                  , kRWI_R    , 0                         , 0  , 649 ), // #461
+  INST(Fccmpe_v         , SimdFccmpFccmpe    , (0b00011110001000000000010000010000)                                                  , kRWI_R    , 0                         , 1  , 655 ), // #462
+  INST(Fcmeq_v          , SimdFcm            , (0b0000111000100000111001, kHF_C, 0b0000111010100000110110)                           , kRWI_W    , 0                         , 0  , 662 ), // #463
+  INST(Fcmge_v          , SimdFcm            , (0b0010111000100000111001, kHF_C, 0b0010111010100000110010)                           , kRWI_W    , 0                         , 1  , 668 ), // #464
+  INST(Fcmgt_v          , SimdFcm            , (0b0010111010100000111001, kHF_C, 0b0000111010100000110010)                           , kRWI_W    , 0                         , 2  , 674 ), // #465
+  INST(Fcmla_v          , SimdFcmla          , (0b0010111000000000110001, 0b0010111100000000000100)                                  , kRWI_X    , 0                         , 0  , 680 ), // #466
+  INST(Fcmle_v          , SimdFcm            , (0b0000000000000000000000, kHF_C, 0b0010111010100000110110)                           , kRWI_W    , 0                         , 3  , 686 ), // #467
+  INST(Fcmlt_v          , SimdFcm            , (0b0000000000000000000000, kHF_C, 0b0000111010100000111010)                           , kRWI_W    , 0                         , 4  , 692 ), // #468
+  INST(Fcmp_v           , SimdFcmpFcmpe      , (0b00011110001000000010000000000000)                                                  , kRWI_R    , 0                         , 0  , 698 ), // #469
+  INST(Fcmpe_v          , SimdFcmpFcmpe      , (0b00011110001000000010000000010000)                                                  , kRWI_R    , 0                         , 1  , 703 ), // #470
+  INST(Fcsel_v          , SimdFcsel          , (_)                                                                                   , kRWI_W    , 0                         , 0  , 709 ), // #471
+  INST(Fcvt_v           , SimdFcvt           , (_)                                                                                   , kRWI_W    , 0                         , 0  , 197 ), // #472
+  INST(Fcvtas_v         , SimdFcvtSV         , (0b0000111000100001110010, 0b0000000000000000000000, 0b0001111000100100000000, 1)     , kRWI_W    , 0                         , 0  , 715 ), // #473
+  INST(Fcvtau_v         , SimdFcvtSV         , (0b0010111000100001110010, 0b0000000000000000000000, 0b0001111000100101000000, 1)     , kRWI_W    , 0                         , 1  , 722 ), // #474
+  INST(Fcvtl_v          , SimdFcvtLN         , (0b0000111000100001011110, 0, 0)                                                      , kRWI_W    , F(Long)                   , 0  , 729 ), // #475
+  INST(Fcvtl2_v         , SimdFcvtLN         , (0b0100111000100001011110, 0, 0)                                                      , kRWI_W    , F(Long)                   , 1  , 735 ), // #476
+  INST(Fcvtms_v         , SimdFcvtSV         , (0b0000111000100001101110, 0b0000000000000000000000, 0b0001111000110000000000, 1)     , kRWI_W    , 0                         , 2  , 742 ), // #477
+  INST(Fcvtmu_v         , SimdFcvtSV         , (0b0010111000100001101110, 0b0000000000000000000000, 0b0001111000110001000000, 1)     , kRWI_W    , 0                         , 3  , 749 ), // #478
+  INST(Fcvtn_v          , SimdFcvtLN         , (0b0000111000100001011010, 0, 0)                                                      , kRWI_W    , F(Narrow)                 , 2  , 203 ), // #479
+  INST(Fcvtn2_v         , SimdFcvtLN         , (0b0100111000100001011010, 0, 0)                                                      , kRWI_X    , F(Narrow)                 , 3  , 210 ), // #480
+  INST(Fcvtns_v         , SimdFcvtSV         , (0b0000111000100001101010, 0b0000000000000000000000, 0b0001111000100000000000, 1)     , kRWI_W    , 0                         , 4  , 756 ), // #481
+  INST(Fcvtnu_v         , SimdFcvtSV         , (0b0010111000100001101010, 0b0000000000000000000000, 0b0001111000100001000000, 1)     , kRWI_W    , 0                         , 5  , 763 ), // #482
+  INST(Fcvtps_v         , SimdFcvtSV         , (0b0000111010100001101010, 0b0000000000000000000000, 0b0001111000101000000000, 1)     , kRWI_W    , 0                         , 6  , 770 ), // #483
+  INST(Fcvtpu_v         , SimdFcvtSV         , (0b0010111010100001101010, 0b0000000000000000000000, 0b0001111000101001000000, 1)     , kRWI_W    , 0                         , 7  , 777 ), // #484
+  INST(Fcvtxn_v         , SimdFcvtLN         , (0b0010111000100001011010, 1, 1)                                                      , kRWI_W    , F(Narrow)                 , 4  , 784 ), // #485
+  INST(Fcvtxn2_v        , SimdFcvtLN         , (0b0110111000100001011010, 1, 0)                                                      , kRWI_X    , F(Narrow)                 , 5  , 791 ), // #486
+  INST(Fcvtzs_v         , SimdFcvtSV         , (0b0000111010100001101110, 0b0000111100000000111111, 0b0001111000111000000000, 1)     , kRWI_W    , 0                         , 8  , 799 ), // #487
+  INST(Fcvtzu_v         , SimdFcvtSV         , (0b0010111010100001101110, 0b0010111100000000111111, 0b0001111000111001000000, 1)     , kRWI_W    , 0                         , 9  , 806 ), // #488
+  INST(Fdiv_v           , FSimdVVV           , (0b0001111000100000000110, kHF_A, 0b0010111000100000111111, kHF_C)                    , kRWI_W    , 0                         , 4  , 813 ), // #489
+  INST(Fjcvtzs_v        , ISimdVVx           , (0b0001111001111110000000, kOp_GpW, kOp_D)                                            , kRWI_W    , 0                         , 7  , 818 ), // #490
+  INST(Fmadd_v          , FSimdVVVV          , (0b0001111100000000000000, kHF_A, 0b0000000000000000000000, kHF_N)                    , kRWI_W    , 0                         , 0  , 826 ), // #491
+  INST(Fmax_v           , FSimdVVV           , (0b0001111000100000010010, kHF_A, 0b0000111000100000111101, kHF_C)                    , kRWI_W    , 0                         , 5  , 832 ), // #492
+  INST(Fmaxnm_v         , FSimdVVV           , (0b0001111000100000011010, kHF_A, 0b0000111000100000110001, kHF_C)                    , kRWI_W    , 0                         , 6  , 837 ), // #493
+  INST(Fmaxnmp_v        , FSimdPair          , (0b0111111000110000110010, 0b0010111000100000110001)                                  , kRWI_W    , 0                         , 1  , 844 ), // #494
+  INST(Fmaxnmv_v        , FSimdSV            , (0b0010111000110000110010)                                                            , kRWI_W    , 0                         , 0  , 852 ), // #495
+  INST(Fmaxp_v          , FSimdPair          , (0b0111111000110000111110, 0b0010111000100000111101)                                  , kRWI_W    , 0                         , 2  , 860 ), // #496
+  INST(Fmaxv_v          , FSimdSV            , (0b0010111000110000111110)                                                            , kRWI_W    , 0                         , 1  , 866 ), // #497
+  INST(Fmin_v           , FSimdVVV           , (0b0001111000100000010110, kHF_A, 0b0000111010100000111101, kHF_C)                    , kRWI_W    , 0                         , 7  , 872 ), // #498
+  INST(Fminnm_v         , FSimdVVV           , (0b0001111000100000011110, kHF_A, 0b0000111010100000110001, kHF_C)                    , kRWI_W    , 0                         , 8  , 877 ), // #499
+  INST(Fminnmp_v        , FSimdPair          , (0b0111111010110000110010, 0b0010111010100000110001)                                  , kRWI_W    , 0                         , 3  , 884 ), // #500
+  INST(Fminnmv_v        , FSimdSV            , (0b0010111010110000110010)                                                            , kRWI_W    , 0                         , 2  , 892 ), // #501
+  INST(Fminp_v          , FSimdPair          , (0b0111111010110000111110, 0b0010111010100000111101)                                  , kRWI_W    , 0                         , 4  , 900 ), // #502
+  INST(Fminv_v          , FSimdSV            , (0b0010111010110000111110)                                                            , kRWI_W    , 0                         , 3  , 906 ), // #503
+  INST(Fmla_v           , FSimdVVVe          , (0b0000000000000000000000, kHF_N, 0b0000111000100000110011, 0b0000111110000000000100) , kRWI_X    , F(VH0_15)                 , 0  , 912 ), // #504
+  INST(Fmlal_v          , SimdFmlal          , (0b0000111000100000111011, 0b0000111110000000000000, 1, kET_S, kET_H, kET_H)          , kRWI_X    , F(VH0_15)                 , 2  , 917 ), // #505
+  INST(Fmlal2_v         , SimdFmlal          , (0b0010111000100000110011, 0b0010111110000000100000, 1, kET_S, kET_H, kET_H)          , kRWI_X    , F(VH0_15)                 , 3  , 923 ), // #506
+  INST(Fmls_v           , FSimdVVVe          , (0b0000000000000000000000, kHF_N, 0b0000111010100000110011, 0b0000111110000000010100) , kRWI_X    , F(VH0_15)                 , 1  , 930 ), // #507
+  INST(Fmlsl_v          , SimdFmlal          , (0b0000111010100000111011, 0b0000111110000000010000, 1, kET_S, kET_H, kET_H)          , kRWI_X    , F(VH0_15)                 , 4  , 935 ), // #508
+  INST(Fmlsl2_v         , SimdFmlal          , (0b0010111010100000110011, 0b0010111110000000110000, 1, kET_S, kET_H, kET_H)          , kRWI_X    , F(VH0_15)                 , 5  , 941 ), // #509
+  INST(Fmov_v           , SimdFmov           , (_)                                                                                   , kRWI_W    , 0                         , 0  , 948 ), // #510
+  INST(Fmsub_v          , FSimdVVVV          , (0b0001111100000000100000, kHF_A, 0b0000000000000000000000, kHF_N)                    , kRWI_W    , 0                         , 1  , 953 ), // #511
+  INST(Fmul_v           , FSimdVVVe          , (0b0001111000100000000010, kHF_A, 0b0010111000100000110111, 0b0000111110000000100100) , kRWI_W    , F(VH0_15)                 , 2  , 959 ), // #512
+  INST(Fmulx_v          , FSimdVVVe          , (0b0101111000100000110111, kHF_C, 0b0000111000100000110111, 0b0010111110000000100100) , kRWI_W    , F(VH0_15)                 , 3  , 964 ), // #513
+  INST(Fneg_v           , FSimdVV            , (0b0001111000100001010000, kHF_A, 0b0010111010100000111110, kHF_B)                    , kRWI_W    , 0                         , 1  , 970 ), // #514
+  INST(Fnmadd_v         , FSimdVVVV          , (0b0001111100100000000000, kHF_A, 0b0000000000000000000000, kHF_N)                    , kRWI_W    , 0                         , 2  , 975 ), // #515
+  INST(Fnmsub_v         , FSimdVVVV          , (0b0001111100100000100000, kHF_A, 0b0000000000000000000000, kHF_N)                    , kRWI_W    , 0                         , 3  , 982 ), // #516
+  INST(Fnmul_v          , FSimdVVV           , (0b0001111000100000100010, kHF_A, 0b0000000000000000000000, kHF_N)                    , kRWI_W    , 0                         , 9  , 989 ), // #517
+  INST(Frecpe_v         , FSimdVV            , (0b0101111010100001110110, kHF_B, 0b0000111010100001110110, kHF_B)                    , kRWI_W    , 0                         , 2  , 995 ), // #518
+  INST(Frecps_v         , FSimdVVV           , (0b0101111000100000111111, kHF_C, 0b0000111000100000111111, kHF_C)                    , kRWI_W    , 0                         , 10 , 1002), // #519
+  INST(Frecpx_v         , FSimdVV            , (0b0101111010100001111110, kHF_B, 0b0000000000000000000000, kHF_N)                    , kRWI_W    , 0                         , 3  , 1009), // #520
+  INST(Frint32x_v       , FSimdVV            , (0b0001111000101000110000, kHF_N, 0b0010111000100001111010, kHF_N)                    , kRWI_W    , 0                         , 4  , 1016), // #521
+  INST(Frint32z_v       , FSimdVV            , (0b0001111000101000010000, kHF_N, 0b0000111000100001111010, kHF_N)                    , kRWI_W    , 0                         , 5  , 1025), // #522
+  INST(Frint64x_v       , FSimdVV            , (0b0001111000101001110000, kHF_N, 0b0010111000100001111110, kHF_N)                    , kRWI_W    , 0                         , 6  , 1034), // #523
+  INST(Frint64z_v       , FSimdVV            , (0b0001111000101001010000, kHF_N, 0b0000111000100001111110, kHF_N)                    , kRWI_W    , 0                         , 7  , 1043), // #524
+  INST(Frinta_v         , FSimdVV            , (0b0001111000100110010000, kHF_A, 0b0010111000100001100010, kHF_B)                    , kRWI_W    , 0                         , 8  , 1052), // #525
+  INST(Frinti_v         , FSimdVV            , (0b0001111000100111110000, kHF_A, 0b0010111010100001100110, kHF_B)                    , kRWI_W    , 0                         , 9  , 1059), // #526
+  INST(Frintm_v         , FSimdVV            , (0b0001111000100101010000, kHF_A, 0b0000111000100001100110, kHF_B)                    , kRWI_W    , 0                         , 10 , 1066), // #527
+  INST(Frintn_v         , FSimdVV            , (0b0001111000100100010000, kHF_A, 0b0000111000100001100010, kHF_B)                    , kRWI_W    , 0                         , 11 , 1073), // #528
+  INST(Frintp_v         , FSimdVV            , (0b0001111000100100110000, kHF_A, 0b0000111010100001100010, kHF_B)                    , kRWI_W    , 0                         , 12 , 1080), // #529
+  INST(Frintx_v         , FSimdVV            , (0b0001111000100111010000, kHF_A, 0b0010111000100001100110, kHF_B)                    , kRWI_W    , 0                         , 13 , 1087), // #530
+  INST(Frintz_v         , FSimdVV            , (0b0001111000100101110000, kHF_A, 0b0000111010100001100110, kHF_B)                    , kRWI_W    , 0                         , 14 , 1094), // #531
+  INST(Frsqrte_v        , FSimdVV            , (0b0111111010100001110110, kHF_B, 0b0010111010100001110110, kHF_B)                    , kRWI_W    , 0                         , 15 , 1101), // #532
+  INST(Frsqrts_v        , FSimdVVV           , (0b0101111010100000111111, kHF_C, 0b0000111010100000111111, kHF_C)                    , kRWI_W    , 0                         , 11 , 1109), // #533
+  INST(Fsqrt_v          , FSimdVV            , (0b0001111000100001110000, kHF_A, 0b0010111010100001111110, kHF_B)                    , kRWI_W    , 0                         , 16 , 1117), // #534
+  INST(Fsub_v           , FSimdVVV           , (0b0001111000100000001110, kHF_A, 0b0000111010100000110101, kHF_C)                    , kRWI_W    , 0                         , 12 , 1123), // #535
+  INST(Ins_v            , SimdIns            , (_)                                                                                   , kRWI_X    , 0                         , 0  , 1145), // #536
+  INST(Ld1_v            , SimdLdNStN         , (0b0000110101000000000000, 0b0000110001000000001000, 1, 0)                            , kRWI_LDn  , F(Consecutive)            , 0  , 1153), // #537
+  INST(Ld1r_v           , SimdLdNStN         , (0b0000110101000000110000, 0b0000000000000000000000, 1, 1)                            , kRWI_LDn  , F(Consecutive)            , 1  , 1157), // #538
+  INST(Ld2_v            , SimdLdNStN         , (0b0000110101100000000000, 0b0000110001000000100000, 2, 0)                            , kRWI_LDn  , F(Consecutive)            , 2  , 1162), // #539
+  INST(Ld2r_v           , SimdLdNStN         , (0b0000110101100000110000, 0b0000000000000000000000, 2, 1)                            , kRWI_LDn  , F(Consecutive)            , 3  , 1166), // #540
+  INST(Ld3_v            , SimdLdNStN         , (0b0000110101000000001000, 0b0000110001000000010000, 3, 0)                            , kRWI_LDn  , F(Consecutive)            , 4  , 1171), // #541
+  INST(Ld3r_v           , SimdLdNStN         , (0b0000110101000000111000, 0b0000000000000000000000, 3, 1)                            , kRWI_LDn  , F(Consecutive)            , 5  , 1175), // #542
+  INST(Ld4_v            , SimdLdNStN         , (0b0000110101100000001000, 0b0000110001000000000000, 4, 0)                            , kRWI_LDn  , F(Consecutive)            , 6  , 1180), // #543
+  INST(Ld4r_v           , SimdLdNStN         , (0b0000110101100000111000, 0b0000000000000000000000, 4, 1)                            , kRWI_LDn  , F(Consecutive)            , 7  , 1184), // #544
+  INST(Ldnp_v           , SimdLdpStp         , (0b0010110001, 0b0000000000)                                                          , kRWI_WW   , 0                         , 0  , 1537), // #545
+  INST(Ldp_v            , SimdLdpStp         , (0b0010110101, 0b0010110011)                                                          , kRWI_WW   , 0                         , 1  , 1542), // #546
+  INST(Ldr_v            , SimdLdSt           , (0b0011110101, 0b00111100010, 0b00111100011, 0b00011100, Inst::kIdLdur_v)             , kRWI_W    , 0                         , 0  , 1552), // #547
+  INST(Ldur_v           , SimdLdurStur       , (0b0011110001000000000000)                                                            , kRWI_W    , 0                         , 0  , 2142), // #548
+  INST(Mla_v            , ISimdVVVe          , (0b0000111000100000100101, kVO_V_BHS, 0b0010111100000000000000, kVO_V_HS)             , kRWI_X    , F(VH0_15)                 , 0  , 246 ), // #549
+  INST(Mls_v            , ISimdVVVe          , (0b0010111000100000100101, kVO_V_BHS, 0b0010111100000000010000, kVO_V_HS)             , kRWI_X    , F(VH0_15)                 , 1  , 931 ), // #550
+  INST(Mov_v            , SimdMov            , (_)                                                                                   , kRWI_W    , 0                         , 0  , 949 ), // #551
+  INST(Movi_v           , SimdMoviMvni       , (0b0000111100000000000001, 0)                                                         , kRWI_W    , 0                         , 0  , 2221), // #552
+  INST(Mul_v            , ISimdVVVe          , (0b0000111000100000100111, kVO_V_BHS, 0b0000111100000000100000, kVO_V_HS)             , kRWI_W    , F(VH0_15)                 , 2  , 991 ), // #553
+  INST(Mvn_v            , ISimdVV            , (0b0010111000100000010110, kVO_V_B)                                                   , kRWI_W    , 0                         , 4  , 2249), // #554
+  INST(Mvni_v           , SimdMoviMvni       , (0b0000111100000000000001, 1)                                                         , kRWI_W    , 0                         , 1  , 2253), // #555
+  INST(Neg_v            , ISimdVV            , (0b0010111000100000101110, kVO_V_Any)                                                 , kRWI_W    , 0                         , 5  , 540 ), // #556
+  INST(Not_v            , ISimdVV            , (0b0010111000100000010110, kVO_V_B)                                                   , kRWI_W    , 0                         , 6  , 2276), // #557
+  INST(Orn_v            , ISimdVVV           , (0b0000111011100000000111, kVO_V_B)                                                   , kRWI_W    , 0                         , 9  , 2280), // #558
+  INST(Orr_v            , SimdBicOrr         , (0b0000111010100000000111, 0b0000111100000000000001)                                  , kRWI_W    , 0                         , 1  , 2284), // #559
+  INST(Pmul_v           , ISimdVVV           , (0b0010111000100000100111, kVO_V_B)                                                   , kRWI_W    , 0                         , 10 , 2320), // #560
+  INST(Pmull_v          , ISimdVVV           , (0b0000111000100000111000, kVO_V_B8D1)                                                , kRWI_W    , F(Long)                   , 11 , 2325), // #561
+  INST(Pmull2_v         , ISimdVVV           , (0b0100111000100000111000, kVO_V_B16D2)                                               , kRWI_W    , F(Long)                   , 12 , 2331), // #562
+  INST(Raddhn_v         , ISimdVVV           , (0b0010111000100000010000, kVO_V_B8H4S2)                                              , kRWI_W    , F(Narrow)                 , 13 , 2344), // #563
+  INST(Raddhn2_v        , ISimdVVV           , (0b0110111000100000010000, kVO_V_B16H8S4)                                             , kRWI_X    , F(Narrow)                 , 14 , 2351), // #564
+  INST(Rax1_v           , ISimdVVV           , (0b1100111001100000100011, kVO_V_D2)                                                  , kRWI_W    , 0                         , 15 , 2359), // #565
+  INST(Rbit_v           , ISimdVV            , (0b0010111001100000010110, kVO_V_B)                                                   , kRWI_W    , 0                         , 7  , 2364), // #566
+  INST(Rev16_v          , ISimdVV            , (0b0000111000100000000110, kVO_V_B)                                                   , kRWI_W    , 0                         , 8  , 2373), // #567
+  INST(Rev32_v          , ISimdVV            , (0b0010111000100000000010, kVO_V_BH)                                                  , kRWI_W    , 0                         , 9  , 2379), // #568
+  INST(Rev64_v          , ISimdVV            , (0b0000111000100000000010, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 10 , 2385), // #569
+  INST(Rshrn_v          , SimdShift          , (0b0000000000000000000000, 0b0000111100000000100011, 1, kVO_V_B8H4S2)                 , kRWI_W    , F(Narrow)                 , 0  , 2960), // #570
+  INST(Rshrn2_v         , SimdShift          , (0b0000000000000000000000, 0b0100111100000000100011, 1, kVO_V_B16H8S4)                , kRWI_X    , F(Narrow)                 , 1  , 2968), // #571
+  INST(Rsubhn_v         , ISimdVVV           , (0b0010111000100000011000, kVO_V_B8H4S2)                                              , kRWI_W    , F(Narrow)                 , 16 , 2400), // #572
+  INST(Rsubhn2_v        , ISimdVVV           , (0b0110111000100000011000, kVO_V_B16H8S4)                                             , kRWI_X    , F(Narrow)                 , 17 , 2407), // #573
+  INST(Saba_v           , ISimdVVV           , (0b0000111000100000011111, kVO_V_BHS)                                                 , kRWI_X    , 0                         , 18 , 2415), // #574
+  INST(Sabal_v          , ISimdVVV           , (0b0000111000100000010100, kVO_V_B8H4S2)                                              , kRWI_X    , F(Long)                   , 19 , 2420), // #575
+  INST(Sabal2_v         , ISimdVVV           , (0b0100111000100000010100, kVO_V_B16H8S4)                                             , kRWI_X    , F(Long)                   , 20 , 2426), // #576
+  INST(Sabd_v           , ISimdVVV           , (0b0000111000100000011101, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 21 , 2433), // #577
+  INST(Sabdl_v          , ISimdVVV           , (0b0000111000100000011100, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 22 , 2438), // #578
+  INST(Sabdl2_v         , ISimdVVV           , (0b0100111000100000011100, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 23 , 2444), // #579
+  INST(Sadalp_v         , ISimdVV            , (0b0000111000100000011010, kVO_V_BHS)                                                 , kRWI_X    , F(Long) | F(Pair)         , 11 , 2451), // #580
+  INST(Saddl_v          , ISimdVVV           , (0b0000111000100000000000, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 24 , 2458), // #581
+  INST(Saddl2_v         , ISimdVVV           , (0b0100111000100000000000, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 25 , 2464), // #582
+  INST(Saddlp_v         , ISimdVV            , (0b0000111000100000001010, kVO_V_BHS)                                                 , kRWI_W    , F(Long) | F(Pair)         , 12 , 2471), // #583
+  INST(Saddlv_v         , ISimdSV            , (0b0000111000110000001110, kVO_V_BH_4S)                                               , kRWI_W    , F(Long)                   , 1  , 2478), // #584
+  INST(Saddw_v          , ISimdWWV           , (0b0000111000100000000100, kVO_V_B8H4S2)                                              , kRWI_W    , 0                         , 0  , 2485), // #585
+  INST(Saddw2_v         , ISimdWWV           , (0b0000111000100000000100, kVO_V_B16H8S4)                                             , kRWI_W    , 0                         , 1  , 2491), // #586
+  INST(Scvtf_v          , SimdFcvtSV         , (0b0000111000100001110110, 0b0000111100000000111001, 0b0001111000100010000000, 0)     , kRWI_W    , 0                         , 10 , 2523), // #587
+  INST(Sdot_v           , SimdDot            , (0b0000111010000000100101, 0b0000111110000000111000, kET_S, kET_B, kET_4B)            , kRWI_X    , 0                         , 1  , 4218), // #588
+  INST(Sha1c_v          , ISimdVVVx          , (0b0101111000000000000000, kOp_Q, kOp_S, kOp_V4S)                                     , kRWI_X    , 0                         , 1  , 2556), // #589
+  INST(Sha1h_v          , ISimdVVx           , (0b0101111000101000000010, kOp_S, kOp_S)                                              , kRWI_W    , 0                         , 8  , 2562), // #590
+  INST(Sha1m_v          , ISimdVVVx          , (0b0101111000000000001000, kOp_Q, kOp_S, kOp_V4S)                                     , kRWI_X    , 0                         , 2  , 2568), // #591
+  INST(Sha1p_v          , ISimdVVVx          , (0b0101111000000000000100, kOp_Q, kOp_S, kOp_V4S)                                     , kRWI_X    , 0                         , 3  , 2574), // #592
+  INST(Sha1su0_v        , ISimdVVVx          , (0b0101111000000000001100, kOp_V4S, kOp_V4S, kOp_V4S)                                 , kRWI_X    , 0                         , 4  , 2580), // #593
+  INST(Sha1su1_v        , ISimdVVx           , (0b0101111000101000000110, kOp_V4S, kOp_V4S)                                          , kRWI_X    , 0                         , 9  , 2588), // #594
+  INST(Sha256h_v        , ISimdVVVx          , (0b0101111000000000010000, kOp_Q, kOp_Q, kOp_V4S)                                     , kRWI_X    , 0                         , 5  , 2596), // #595
+  INST(Sha256h2_v       , ISimdVVVx          , (0b0101111000000000010100, kOp_Q, kOp_Q, kOp_V4S)                                     , kRWI_X    , 0                         , 6  , 2604), // #596
+  INST(Sha256su0_v      , ISimdVVx           , (0b0101111000101000001010, kOp_V4S, kOp_V4S)                                          , kRWI_X    , 0                         , 10 , 2613), // #597
+  INST(Sha256su1_v      , ISimdVVVx          , (0b0101111000000000011000, kOp_V4S, kOp_V4S, kOp_V4S)                                 , kRWI_X    , 0                         , 7  , 2623), // #598
+  INST(Sha512h_v        , ISimdVVVx          , (0b1100111001100000100000, kOp_Q, kOp_Q, kOp_V2D)                                     , kRWI_X    , 0                         , 8  , 2633), // #599
+  INST(Sha512h2_v       , ISimdVVVx          , (0b1100111001100000100001, kOp_Q, kOp_Q, kOp_V2D)                                     , kRWI_X    , 0                         , 9  , 2641), // #600
+  INST(Sha512su0_v      , ISimdVVx           , (0b1100111011000000100000, kOp_V2D, kOp_V2D)                                          , kRWI_X    , 0                         , 11 , 2650), // #601
+  INST(Sha512su1_v      , ISimdVVVx          , (0b1100111001100000100010, kOp_V2D, kOp_V2D, kOp_V2D)                                 , kRWI_X    , 0                         , 10 , 2660), // #602
+  INST(Shadd_v          , ISimdVVV           , (0b0000111000100000000001, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 26 , 2670), // #603
+  INST(Shl_v            , SimdShift          , (0b0000000000000000000000, 0b0000111100000000010101, 0, kVO_V_Any)                    , kRWI_W    , 0                         , 2  , 2954), // #604
+  INST(Shll_v           , SimdShiftES        , (0b0010111000100001001110, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 0  , 3108), // #605
+  INST(Shll2_v          , SimdShiftES        , (0b0110111000100001001110, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 1  , 3114), // #606
+  INST(Shrn_v           , SimdShift          , (0b0000000000000000000000, 0b0000111100000000100001, 1, kVO_V_B8H4S2)                 , kRWI_W    , F(Narrow)                 , 3  , 2961), // #607
+  INST(Shrn2_v          , SimdShift          , (0b0000000000000000000000, 0b0100111100000000100001, 1, kVO_V_B16H8S4)                , kRWI_X    , F(Narrow)                 , 4  , 2969), // #608
+  INST(Shsub_v          , ISimdVVV           , (0b0000111000100000001001, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 27 , 2676), // #609
+  INST(Sli_v            , SimdShift          , (0b0000000000000000000000, 0b0010111100000000010101, 0, kVO_V_Any)                    , kRWI_X    , 0                         , 5  , 2682), // #610
+  INST(Sm3partw1_v      , ISimdVVVx          , (0b1100111001100000110000, kOp_V4S, kOp_V4S, kOp_V4S)                                 , kRWI_X    , 0                         , 11 , 2686), // #611
+  INST(Sm3partw2_v      , ISimdVVVx          , (0b1100111001100000110001, kOp_V4S, kOp_V4S, kOp_V4S)                                 , kRWI_X    , 0                         , 12 , 2696), // #612
+  INST(Sm3ss1_v         , ISimdVVVVx         , (0b1100111001000000000000, kOp_V4S, kOp_V4S, kOp_V4S, kOp_V4S)                        , kRWI_W    , 0                         , 0  , 2706), // #613
+  INST(Sm3tt1a_v        , SimdSm3tt          , (0b1100111001000000100000)                                                            , kRWI_X    , 0                         , 0  , 2713), // #614
+  INST(Sm3tt1b_v        , SimdSm3tt          , (0b1100111001000000100001)                                                            , kRWI_X    , 0                         , 1  , 2721), // #615
+  INST(Sm3tt2a_v        , SimdSm3tt          , (0b1100111001000000100010)                                                            , kRWI_X    , 0                         , 2  , 2729), // #616
+  INST(Sm3tt2b_v        , SimdSm3tt          , (0b1100111001000000100011)                                                            , kRWI_X    , 0                         , 3  , 2737), // #617
+  INST(Sm4e_v           , ISimdVVx           , (0b1100111011000000100001, kOp_V4S, kOp_V4S)                                          , kRWI_X    , 0                         , 12 , 2745), // #618
+  INST(Sm4ekey_v        , ISimdVVVx          , (0b1100111001100000110010, kOp_V4S, kOp_V4S, kOp_V4S)                                 , kRWI_X    , 0                         , 13 , 2750), // #619
+  INST(Smax_v           , ISimdVVV           , (0b0000111000100000011001, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 28 , 1690), // #620
+  INST(Smaxp_v          , ISimdVVV           , (0b0000111000100000101001, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 29 , 2765), // #621
+  INST(Smaxv_v          , ISimdSV            , (0b0000111000110000101010, kVO_V_BH_4S)                                               , kRWI_W    , 0                         , 2  , 2771), // #622
+  INST(Smin_v           , ISimdVVV           , (0b0000111000100000011011, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 30 , 1794), // #623
+  INST(Sminp_v          , ISimdVVV           , (0b0000111000100000101011, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 31 , 2777), // #624
+  INST(Sminv_v          , ISimdSV            , (0b0000111000110001101010, kVO_V_BH_4S)                                               , kRWI_W    , 0                         , 3  , 2783), // #625
+  INST(Smlal_v          , ISimdVVVe          , (0b0000111000100000100000, kVO_V_B8H4S2, 0b0000111100000000001000, kVO_V_H4S2)        , kRWI_X    , F(Long) | F(VH0_15)       , 3  , 2789), // #626
+  INST(Smlal2_v         , ISimdVVVe          , (0b0100111000100000100000, kVO_V_B16H8S4, 0b0100111100000000001000, kVO_V_H8S4)       , kRWI_X    , F(Long) | F(VH0_15)       , 4  , 2795), // #627
+  INST(Smlsl_v          , ISimdVVVe          , (0b0000111000100000101000, kVO_V_B8H4S2, 0b0000111100000000011000, kVO_V_H4S2)        , kRWI_X    , F(Long) | F(VH0_15)       , 5  , 2802), // #628
+  INST(Smlsl2_v         , ISimdVVVe          , (0b0100111000100000101000, kVO_V_B16H8S4, 0b0100111100000000011000, kVO_V_H8S4)       , kRWI_X    , F(Long) | F(VH0_15)       , 6  , 2808), // #629
+  INST(Smmla_v          , ISimdVVVx          , (0b0100111010000000101001, kOp_V4S, kOp_V16B, kOp_V16B)                               , kRWI_X    , 0                         , 14 , 4247), // #630
+  INST(Smov_v           , SimdSmovUmov       , (0b0000111000000000001011, kVO_V_BHS, 1)                                              , kRWI_W    , 0                         , 0  , 2822), // #631
+  INST(Smull_v          , ISimdVVVe          , (0b0000111000100000110000, kVO_V_B8H4S2, 0b0000111100000000101000, kVO_V_H4S2)        , kRWI_W    , F(Long) | F(VH0_15)       , 7  , 2840), // #632
+  INST(Smull2_v         , ISimdVVVe          , (0b0100111000100000110000, kVO_V_B16H8S4, 0b0100111100000000101000, kVO_V_H8S4)       , kRWI_W    , F(Long) | F(VH0_15)       , 8  , 2846), // #633
+  INST(Sqabs_v          , ISimdVV            , (0b0000111000100000011110, kVO_SV_Any)                                                , kRWI_W    , 0                         , 13 , 2853), // #634
+  INST(Sqadd_v          , ISimdVVV           , (0b0000111000100000000011, kVO_SV_Any)                                                , kRWI_W    , 0                         , 32 , 4254), // #635
+  INST(Sqdmlal_v        , ISimdVVVe          , (0b0000111000100000100100, kVO_SV_BHS, 0b0000111100000000001100, kVO_V_H4S2)          , kRWI_X    , F(Long) | F(VH0_15)       , 9  , 2859), // #636
+  INST(Sqdmlal2_v       , ISimdVVVe          , (0b0100111000100000100100, kVO_V_B16H8S4, 0b0100111100000000001100, kVO_V_H8S4)       , kRWI_X    , F(Long) | F(VH0_15)       , 10 , 2867), // #637
+  INST(Sqdmlsl_v        , ISimdVVVe          , (0b0000111000100000101100, kVO_SV_BHS, 0b0000111100000000011100, kVO_V_H4S2)          , kRWI_X    , F(Long) | F(VH0_15)       , 11 , 2876), // #638
+  INST(Sqdmlsl2_v       , ISimdVVVe          , (0b0100111000100000101100, kVO_V_B16H8S4, 0b0100111100000000011100, kVO_V_H8S4)       , kRWI_X    , F(Long) | F(VH0_15)       , 12 , 2884), // #639
+  INST(Sqdmulh_v        , ISimdVVVe          , (0b0000111000100000101101, kVO_SV_HS, 0b0000111100000000110000, kVO_SV_HS)            , kRWI_W    , F(VH0_15)                 , 13 , 2893), // #640
+  INST(Sqdmull_v        , ISimdVVVe          , (0b0000111000100000110100, kVO_SV_BHS, 0b0000111100000000101100, kVO_V_H4S2)          , kRWI_W    , F(Long) | F(VH0_15)       , 14 , 2901), // #641
+  INST(Sqdmull2_v       , ISimdVVVe          , (0b0100111000100000110100, kVO_V_B16H8S4, 0b0100111100000000101100, kVO_V_H8S4)       , kRWI_W    , F(Long) | F(VH0_15)       , 15 , 2909), // #642
+  INST(Sqneg_v          , ISimdVV            , (0b0010111000100000011110, kVO_SV_Any)                                                , kRWI_W    , 0                         , 14 , 2918), // #643
+  INST(Sqrdmlah_v       , ISimdVVVe          , (0b0010111000000000100001, kVO_SV_HS, 0b0010111100000000110100, kVO_SV_HS)            , kRWI_X    , F(VH0_15)                 , 16 , 2924), // #644
+  INST(Sqrdmlsh_v       , ISimdVVVe          , (0b0010111000000000100011, kVO_SV_HS, 0b0010111100000000111100, kVO_SV_HS)            , kRWI_X    , F(VH0_15)                 , 17 , 2933), // #645
+  INST(Sqrdmulh_v       , ISimdVVVe          , (0b0010111000100000101101, kVO_SV_HS, 0b0000111100000000110100, kVO_SV_HS)            , kRWI_W    , F(VH0_15)                 , 18 , 2942), // #646
+  INST(Sqrshl_v         , SimdShift          , (0b0000111000100000010111, 0b0000000000000000000000, 1, kVO_SV_Any)                   , kRWI_W    , 0                         , 6  , 2951), // #647
+  INST(Sqrshrn_v        , SimdShift          , (0b0000000000000000000000, 0b0000111100000000100111, 1, kVO_SV_B8H4S2)                , kRWI_W    , F(Narrow)                 , 7  , 2958), // #648
+  INST(Sqrshrn2_v       , SimdShift          , (0b0000000000000000000000, 0b0100111100000000100111, 1, kVO_V_B16H8S4)                , kRWI_X    , F(Narrow)                 , 8  , 2966), // #649
+  INST(Sqrshrun_v       , SimdShift          , (0b0000000000000000000000, 0b0010111100000000100011, 1, kVO_SV_B8H4S2)                , kRWI_W    , F(Narrow)                 , 9  , 2975), // #650
+  INST(Sqrshrun2_v      , SimdShift          , (0b0000000000000000000000, 0b0110111100000000100011, 1, kVO_V_B16H8S4)                , kRWI_X    , F(Narrow)                 , 10 , 2984), // #651
+  INST(Sqshl_v          , SimdShift          , (0b0000111000100000010011, 0b0000111100000000011101, 0, kVO_SV_Any)                   , kRWI_W    , 0                         , 11 , 2994), // #652
+  INST(Sqshlu_v         , SimdShift          , (0b0000000000000000000000, 0b0010111100000000011001, 0, kVO_SV_Any)                   , kRWI_W    , 0                         , 12 , 3000), // #653
+  INST(Sqshrn_v         , SimdShift          , (0b0000000000000000000000, 0b0000111100000000100101, 1, kVO_SV_B8H4S2)                , kRWI_W    , F(Narrow)                 , 13 , 3007), // #654
+  INST(Sqshrn2_v        , SimdShift          , (0b0000000000000000000000, 0b0100111100000000100101, 1, kVO_V_B16H8S4)                , kRWI_X    , F(Narrow)                 , 14 , 3014), // #655
+  INST(Sqshrun_v        , SimdShift          , (0b0000000000000000000000, 0b0010111100000000100001, 1, kVO_SV_B8H4S2)                , kRWI_W    , F(Narrow)                 , 15 , 3022), // #656
+  INST(Sqshrun2_v       , SimdShift          , (0b0000000000000000000000, 0b0110111100000000100001, 1, kVO_V_B16H8S4)                , kRWI_X    , F(Narrow)                 , 16 , 3030), // #657
+  INST(Sqsub_v          , ISimdVVV           , (0b0000111000100000001011, kVO_SV_Any)                                                , kRWI_W    , 0                         , 33 , 3039), // #658
+  INST(Sqxtn_v          , ISimdVV            , (0b0000111000100001010010, kVO_SV_B8H4S2)                                             , kRWI_W    , F(Narrow)                 , 15 , 3045), // #659
+  INST(Sqxtn2_v         , ISimdVV            , (0b0100111000100001010010, kVO_V_B16H8S4)                                             , kRWI_X    , F(Narrow)                 , 16 , 3051), // #660
+  INST(Sqxtun_v         , ISimdVV            , (0b0010111000100001001010, kVO_SV_B8H4S2)                                             , kRWI_W    , F(Narrow)                 , 17 , 3058), // #661
+  INST(Sqxtun2_v        , ISimdVV            , (0b0110111000100001001010, kVO_V_B16H8S4)                                             , kRWI_X    , F(Narrow)                 , 18 , 3065), // #662
+  INST(Srhadd_v         , ISimdVVV           , (0b0000111000100000000101, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 34 , 3073), // #663
+  INST(Sri_v            , SimdShift          , (0b0000000000000000000000, 0b0010111100000000010001, 1, kVO_V_Any)                    , kRWI_W    , 0                         , 17 , 3080), // #664
+  INST(Srshl_v          , SimdShift          , (0b0000111000100000010101, 0b0000000000000000000000, 0, kVO_V_Any)                    , kRWI_W    , 0                         , 18 , 3084), // #665
+  INST(Srshr_v          , SimdShift          , (0b0000000000000000000000, 0b0000111100000000001001, 1, kVO_V_Any)                    , kRWI_W    , 0                         , 19 , 3090), // #666
+  INST(Srsra_v          , SimdShift          , (0b0000000000000000000000, 0b0000111100000000001101, 1, kVO_V_Any)                    , kRWI_X    , 0                         , 20 , 3096), // #667
+  INST(Sshl_v           , SimdShift          , (0b0000111000100000010001, 0b0000000000000000000000, 0, kVO_V_Any)                    , kRWI_W    , 0                         , 21 , 3102), // #668
+  INST(Sshll_v          , SimdShift          , (0b0000000000000000000000, 0b0000111100000000101001, 0, kVO_V_B8H4S2)                 , kRWI_W    , F(Long)                   , 22 , 3107), // #669
+  INST(Sshll2_v         , SimdShift          , (0b0000000000000000000000, 0b0100111100000000101001, 0, kVO_V_B16H8S4)                , kRWI_W    , F(Long)                   , 23 , 3113), // #670
+  INST(Sshr_v           , SimdShift          , (0b0000000000000000000000, 0b0000111100000000000001, 1, kVO_V_Any)                    , kRWI_W    , 0                         , 24 , 3120), // #671
+  INST(Ssra_v           , SimdShift          , (0b0000000000000000000000, 0b0000111100000000000101, 1, kVO_V_Any)                    , kRWI_X    , 0                         , 25 , 3125), // #672
+  INST(Ssubl_v          , ISimdVVV           , (0b0000111000100000001000, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 35 , 3130), // #673
+  INST(Ssubl2_v         , ISimdVVV           , (0b0100111000100000001000, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 36 , 3136), // #674
+  INST(Ssubw_v          , ISimdWWV           , (0b0000111000100000001100, kVO_V_B8H4S2)                                              , kRWI_W    , 0                         , 2  , 3143), // #675
+  INST(Ssubw2_v         , ISimdWWV           , (0b0000111000100000001100, kVO_V_B16H8S4)                                             , kRWI_X    , 0                         , 3  , 3149), // #676
+  INST(St1_v            , SimdLdNStN         , (0b0000110100000000000000, 0b0000110000000000001000, 1, 0)                            , kRWI_STn  , F(Consecutive)            , 8  , 3156), // #677
+  INST(St2_v            , SimdLdNStN         , (0b0000110100100000000000, 0b0000110000000000100000, 2, 0)                            , kRWI_STn  , F(Consecutive)            , 9  , 3160), // #678
+  INST(St3_v            , SimdLdNStN         , (0b0000110100000000001000, 0b0000110000000000010000, 3, 0)                            , kRWI_STn  , F(Consecutive)            , 10 , 3169), // #679
+  INST(St4_v            , SimdLdNStN         , (0b0000110100100000001000, 0b0000110000000000000000, 4, 0)                            , kRWI_STn  , F(Consecutive)            , 11 , 3173), // #680
+  INST(Stnp_v           , SimdLdpStp         , (0b0010110000, 0b0000000000)                                                          , kRWI_RRW  , 0                         , 2  , 3383), // #681
+  INST(Stp_v            , SimdLdpStp         , (0b0010110100, 0b0010110010)                                                          , kRWI_RRW  , 0                         , 3  , 3388), // #682
+  INST(Str_v            , SimdLdSt           , (0b0011110100, 0b00111100000, 0b00111100001, 0b00000000, Inst::kIdStur_v)             , kRWI_RW   , 0                         , 1  , 3392), // #683
+  INST(Stur_v           , SimdLdurStur       , (0b0011110000000000000000)                                                            , kRWI_RW   , 0                         , 1  , 3662), // #684
+  INST(Sub_v            , ISimdVVV           , (0b0010111000100000100001, kVO_V_Any)                                                 , kRWI_W    , 0                         , 37 , 985 ), // #685
+  INST(Subhn_v          , ISimdVVV           , (0b0000111000100000011000, kVO_V_B8H4S2)                                              , kRWI_W    , F(Narrow)                 , 38 , 2401), // #686
+  INST(Subhn2_v         , ISimdVVV           , (0b0000111000100000011000, kVO_V_B16H8S4)                                             , kRWI_X    , F(Narrow)                 , 39 , 2408), // #687
+  INST(Sudot_v          , SimdDot            , (0b0000000000000000000000, 0b0000111100000000111100, kET_S, kET_B, kET_4B)            , kRWI_X    , 0                         , 2  , 3739), // #688
+  INST(Suqadd_v         , ISimdVV            , (0b0000111000100000001110, kVO_SV_Any)                                                , kRWI_X    , 0                         , 19 , 3745), // #689
+  INST(Sxtl_v           , SimdSxtlUxtl       , (0b0000111100000000101001, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 0  , 3834), // #690
+  INST(Sxtl2_v          , SimdSxtlUxtl       , (0b0100111100000000101001, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 1  , 3839), // #691
+  INST(Tbl_v            , SimdTblTbx         , (0b0000111000000000000000)                                                            , kRWI_W    , 0                         , 0  , 3854), // #692
+  INST(Tbx_v            , SimdTblTbx         , (0b0000111000000000000100)                                                            , kRWI_W    , 0                         , 1  , 3863), // #693
+  INST(Trn1_v           , ISimdVVV           , (0b0000111000000000001010, kVO_V_BHS_D2)                                              , kRWI_W    , 0                         , 40 , 3876), // #694
+  INST(Trn2_v           , ISimdVVV           , (0b0000111000000000011010, kVO_V_BHS_D2)                                              , kRWI_W    , 0                         , 41 , 3881), // #695
+  INST(Uaba_v           , ISimdVVV           , (0b0010111000100000011111, kVO_V_BHS)                                                 , kRWI_X    , 0                         , 42 , 3886), // #696
+  INST(Uabal_v          , ISimdVVV           , (0b0010111000100000010100, kVO_V_B8H4S2)                                              , kRWI_X    , F(Long)                   , 43 , 3891), // #697
+  INST(Uabal2_v         , ISimdVVV           , (0b0110111000100000010100, kVO_V_B16H8S4)                                             , kRWI_X    , F(Long)                   , 44 , 3897), // #698
+  INST(Uabd_v           , ISimdVVV           , (0b0010111000100000011101, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 45 , 3904), // #699
+  INST(Uabdl_v          , ISimdVVV           , (0b0010111000100000011100, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 46 , 3909), // #700
+  INST(Uabdl2_v         , ISimdVVV           , (0b0110111000100000011100, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 47 , 3915), // #701
+  INST(Uadalp_v         , ISimdVV            , (0b0010111000100000011010, kVO_V_BHS)                                                 , kRWI_X    , F(Long) | F(Pair)         , 20 , 3922), // #702
+  INST(Uaddl_v          , ISimdVVV           , (0b0010111000100000000000, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 48 , 3929), // #703
+  INST(Uaddl2_v         , ISimdVVV           , (0b0110111000100000000000, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 49 , 3935), // #704
+  INST(Uaddlp_v         , ISimdVV            , (0b0010111000100000001010, kVO_V_BHS)                                                 , kRWI_W    , F(Long) | F(Pair)         , 21 , 3942), // #705
+  INST(Uaddlv_v         , ISimdSV            , (0b0010111000110000001110, kVO_V_BH_4S)                                               , kRWI_W    , F(Long)                   , 4  , 3949), // #706
+  INST(Uaddw_v          , ISimdWWV           , (0b0010111000100000000100, kVO_V_B8H4S2)                                              , kRWI_W    , 0                         , 4  , 3956), // #707
+  INST(Uaddw2_v         , ISimdWWV           , (0b0010111000100000000100, kVO_V_B16H8S4)                                             , kRWI_W    , 0                         , 5  , 3962), // #708
+  INST(Ucvtf_v          , SimdFcvtSV         , (0b0010111000100001110110, 0b0010111100000000111001, 0b0001111000100011000000, 0)     , kRWI_W    , 0                         , 11 , 3985), // #709
+  INST(Udot_v           , SimdDot            , (0b0010111010000000100101, 0b0010111110000000111000, kET_S, kET_B, kET_4B)            , kRWI_X    , 0                         , 3  , 3740), // #710
+  INST(Uhadd_v          , ISimdVVV           , (0b0010111000100000000001, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 50 , 4000), // #711
+  INST(Uhsub_v          , ISimdVVV           , (0b0010111000100000001001, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 51 , 4006), // #712
+  INST(Umax_v           , ISimdVVV           , (0b0010111000100000011001, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 52 , 1936), // #713
+  INST(Umaxp_v          , ISimdVVV           , (0b0010111000100000101001, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 53 , 4019), // #714
+  INST(Umaxv_v          , ISimdSV            , (0b0010111000110000101010, kVO_V_BH_4S)                                               , kRWI_W    , 0                         , 5  , 4025), // #715
+  INST(Umin_v           , ISimdVVV           , (0b0010111000100000011011, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 54 , 2040), // #716
+  INST(Uminp_v          , ISimdVVV           , (0b0010111000100000101011, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 55 , 4031), // #717
+  INST(Uminv_v          , ISimdSV            , (0b0010111000110001101010, kVO_V_BH_4S)                                               , kRWI_W    , 0                         , 6  , 4037), // #718
+  INST(Umlal_v          , ISimdVVVe          , (0b0010111000100000100000, kVO_V_B8H4S2, 0b0010111100000000001000, kVO_V_H4S2)        , kRWI_X    , F(Long) | F(VH0_15)       , 19 , 4043), // #719
+  INST(Umlal2_v         , ISimdVVVe          , (0b0110111000100000100000, kVO_V_B16H8S4, 0b0010111100000000001000, kVO_V_H8S4)       , kRWI_X    , F(Long) | F(VH0_15)       , 20 , 4049), // #720
+  INST(Umlsl_v          , ISimdVVVe          , (0b0010111000100000101000, kVO_V_B8H4S2, 0b0010111100000000011000, kVO_V_H4S2)        , kRWI_X    , F(Long) | F(VH0_15)       , 21 , 4056), // #721
+  INST(Umlsl2_v         , ISimdVVVe          , (0b0110111000100000101000, kVO_V_B16H8S4, 0b0110111100000000011000, kVO_V_H8S4)       , kRWI_X    , F(Long) | F(VH0_15)       , 22 , 4062), // #722
+  INST(Ummla_v          , ISimdVVVx          , (0b0110111010000000101001, kOp_V4S, kOp_V16B, kOp_V16B)                               , kRWI_X    , 0                         , 15 , 4069), // #723
+  INST(Umov_v           , SimdSmovUmov       , (0b0000111000000000001111, kVO_V_Any, 0)                                              , kRWI_W    , 0                         , 1  , 4082), // #724
+  INST(Umull_v          , ISimdVVVe          , (0b0010111000100000110000, kVO_V_B8H4S2, 0b0010111100000000101000, kVO_V_H4S2)        , kRWI_W    , F(Long) | F(VH0_15)       , 23 , 4100), // #725
+  INST(Umull2_v         , ISimdVVVe          , (0b0110111000100000110000, kVO_V_B16H8S4, 0b0110111100000000101000, kVO_V_H8S4)       , kRWI_W    , F(Long) | F(VH0_15)       , 24 , 4106), // #726
+  INST(Uqadd_v          , ISimdVVV           , (0b0010111000100000000011, kVO_SV_Any)                                                , kRWI_W    , 0                         , 56 , 3746), // #727
+  INST(Uqrshl_v         , SimdShift          , (0b0010111000100000010111, 0b0000000000000000000000, 0, kVO_SV_Any)                   , kRWI_W    , 0                         , 26 , 4113), // #728
+  INST(Uqrshrn_v        , SimdShift          , (0b0000000000000000000000, 0b0010111100000000100111, 1, kVO_SV_B8H4S2)                , kRWI_W    , F(Narrow)                 , 27 , 4120), // #729
+  INST(Uqrshrn2_v       , SimdShift          , (0b0000000000000000000000, 0b0110111100000000100111, 1, kVO_V_B16H8S4)                , kRWI_X    , F(Narrow)                 , 28 , 4128), // #730
+  INST(Uqshl_v          , SimdShift          , (0b0010111000100000010011, 0b0010111100000000011101, 0, kVO_SV_Any)                   , kRWI_W    , 0                         , 29 , 4137), // #731
+  INST(Uqshrn_v         , SimdShift          , (0b0000000000000000000000, 0b0010111100000000100101, 1, kVO_SV_B8H4S2)                , kRWI_W    , F(Narrow)                 , 30 , 4143), // #732
+  INST(Uqshrn2_v        , SimdShift          , (0b0000000000000000000000, 0b0110111100000000100101, 1, kVO_V_B16H8S4)                , kRWI_X    , F(Narrow)                 , 31 , 4150), // #733
+  INST(Uqsub_v          , ISimdVVV           , (0b0010111000100000001011, kVO_SV_Any)                                                , kRWI_W    , 0                         , 57 , 4158), // #734
+  INST(Uqxtn_v          , ISimdVV            , (0b0010111000100001010010, kVO_SV_B8H4S2)                                             , kRWI_W    , F(Narrow)                 , 22 , 4164), // #735
+  INST(Uqxtn2_v         , ISimdVV            , (0b0110111000100001010010, kVO_V_B16H8S4)                                             , kRWI_X    , F(Narrow)                 , 23 , 4170), // #736
+  INST(Urecpe_v         , ISimdVV            , (0b0000111010100001110010, kVO_V_S)                                                   , kRWI_W    , 0                         , 24 , 4177), // #737
+  INST(Urhadd_v         , ISimdVVV           , (0b0010111000100000000101, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 58 , 4184), // #738
+  INST(Urshl_v          , SimdShift          , (0b0010111000100000010101, 0b0000000000000000000000, 0, kVO_V_Any)                    , kRWI_W    , 0                         , 32 , 4191), // #739
+  INST(Urshr_v          , SimdShift          , (0b0000000000000000000000, 0b0010111100000000001001, 1, kVO_V_Any)                    , kRWI_W    , 0                         , 33 , 4197), // #740
+  INST(Ursqrte_v        , ISimdVV            , (0b0010111010100001110010, kVO_V_S)                                                   , kRWI_W    , 0                         , 25 , 4203), // #741
+  INST(Ursra_v          , SimdShift          , (0b0000000000000000000000, 0b0010111100000000001101, 1, kVO_V_Any)                    , kRWI_X    , 0                         , 34 , 4211), // #742
+  INST(Usdot_v          , SimdDot            , (0b0000111010000000100111, 0b0000111110000000111100, kET_S, kET_B, kET_4B)            , kRWI_X    , 0                         , 4  , 4217), // #743
+  INST(Ushl_v           , SimdShift          , (0b0010111000100000010001, 0b0000000000000000000000, 0, kVO_V_Any)                    , kRWI_W    , 0                         , 35 , 4223), // #744
+  INST(Ushll_v          , SimdShift          , (0b0000000000000000000000, 0b0010111100000000101001, 0, kVO_V_B8H4S2)                 , kRWI_W    , F(Long)                   , 36 , 4228), // #745
+  INST(Ushll2_v         , SimdShift          , (0b0000000000000000000000, 0b0110111100000000101001, 0, kVO_V_B16H8S4)                , kRWI_W    , F(Long)                   , 37 , 4234), // #746
+  INST(Ushr_v           , SimdShift          , (0b0000000000000000000000, 0b0010111100000000000001, 1, kVO_V_Any)                    , kRWI_W    , 0                         , 38 , 4241), // #747
+  INST(Usmmla_v         , ISimdVVVx          , (0b0100111010000000101011, kOp_V4S, kOp_V16B, kOp_V16B)                               , kRWI_X    , 0                         , 16 , 4246), // #748
+  INST(Usqadd_v         , ISimdVV            , (0b0010111000100000001110, kVO_SV_Any)                                                , kRWI_X    , 0                         , 26 , 4253), // #749
+  INST(Usra_v           , SimdShift          , (0b0000000000000000000000, 0b0010111100000000000101, 1, kVO_V_Any)                    , kRWI_X    , 0                         , 39 , 4260), // #750
+  INST(Usubl_v          , ISimdVVV           , (0b0010111000100000001000, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 59 , 4265), // #751
+  INST(Usubl2_v         , ISimdVVV           , (0b0110111000100000001000, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 60 , 4271), // #752
+  INST(Usubw_v          , ISimdWWV           , (0b0010111000100000001100, kVO_V_B8H4S2)                                              , kRWI_W    , 0                         , 6  , 4278), // #753
+  INST(Usubw2_v         , ISimdWWV           , (0b0010111000100000001100, kVO_V_B16H8S4)                                             , kRWI_W    , 0                         , 7  , 4284), // #754
+  INST(Uxtl_v           , SimdSxtlUxtl       , (0b0010111100000000101001, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 2  , 4301), // #755
+  INST(Uxtl2_v          , SimdSxtlUxtl       , (0b0110111100000000101001, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 3  , 4306), // #756
+  INST(Uzp1_v           , ISimdVVV           , (0b0000111000000000000110, kVO_V_BHS_D2)                                              , kRWI_W    , 0                         , 61 , 4312), // #757
+  INST(Uzp2_v           , ISimdVVV           , (0b0000111000000000010110, kVO_V_BHS_D2)                                              , kRWI_W    , 0                         , 62 , 4317), // #758
+  INST(Xar_v            , ISimdVVVI          , (0b1100111001100000100011, kVO_V_D2, 6, 10, 0)                                        , kRWI_W    , 0                         , 1  , 4337), // #759
+  INST(Xtn_v            , ISimdVV            , (0b0000111000100001001010, kVO_V_B8H4S2)                                              , kRWI_W    , F(Narrow)                 , 27 , 3047), // #760
+  INST(Xtn2_v           , ISimdVV            , (0b0100111000100001001010, kVO_V_B16H8S4)                                             , kRWI_X    , F(Narrow)                 , 28 , 3053), // #761
+  INST(Zip1_v           , ISimdVVV           , (0b0000111000000000001110, kVO_V_BHS_D2)                                              , kRWI_W    , 0                         , 63 , 4367), // #762
+  INST(Zip2_v           , ISimdVVV           , (0b0000111000000000011110, kVO_V_BHS_D2)                                              , kRWI_W    , 0                         , 64 , 4372)  // #763
+  // ${InstInfo:End}
+};
+
+#undef F
+#undef INST
+#undef NAME_DATA_INDEX
+
+namespace EncodingData {
+
+// ${EncodingData:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+const BaseAddSub baseAddSub[4] = {
+  { 0b0001011000, 0b0001011001, 0b0010001 }, // add
+  { 0b0101011000, 0b0101011001, 0b0110001 }, // adds
+  { 0b1001011000, 0b1001011001, 0b1010001 }, // sub
+  { 0b1101011000, 0b1101011001, 0b1110001 }  // subs
+};
+
+const BaseAdr baseAdr[2] = {
+  { 0b0001000000000000000000, OffsetType::kAArch64_ADR }, // adr
+  { 0b1001000000000000000000, OffsetType::kAArch64_ADRP }  // adrp
+};
+
+const BaseAtDcIcTlbi baseAtDcIcTlbi[4] = {
+  { 0b00011111110000, 0b00001111000000, true }, // at
+  { 0b00011110000000, 0b00001110000000, true }, // dc
+  { 0b00011110000000, 0b00001110000000, false }, // ic
+  { 0b00011110000000, 0b00010000000000, false }  // tlbi
+};
+
+const BaseAtomicCasp baseAtomicCasp[4] = {
+  { 0b0000100000100000011111, kWX, 30 }, // casp
+  { 0b0000100001100000011111, kWX, 30 }, // caspa
+  { 0b0000100001100000111111, kWX, 30 }, // caspal
+  { 0b0000100000100000111111, kWX, 30 }  // caspl
+};
+
+const BaseAtomicOp baseAtomicOp[123] = {
+  { 0b1000100010100000011111, kWX, 30, 0 }, // cas
+  { 0b1000100011100000011111, kWX, 30, 1 }, // casa
+  { 0b0000100011100000011111, kW , 0 , 1 }, // casab
+  { 0b0100100011100000011111, kW , 0 , 1 }, // casah
+  { 0b1000100011100000111111, kWX, 30, 1 }, // casal
+  { 0b0000100011100000111111, kW , 0 , 1 }, // casalb
+  { 0b0100100011100000111111, kW , 0 , 1 }, // casalh
+  { 0b0000100010100000011111, kW , 0 , 0 }, // casb
+  { 0b0100100010100000011111, kW , 0 , 0 }, // cash
+  { 0b1000100010100000111111, kWX, 30, 0 }, // casl
+  { 0b0000100010100000111111, kW , 0 , 0 }, // caslb
+  { 0b0100100010100000111111, kW , 0 , 0 }, // caslh
+  { 0b1011100000100000000000, kWX, 30, 0 }, // ldadd
+  { 0b1011100010100000000000, kWX, 30, 1 }, // ldadda
+  { 0b0011100010100000000000, kW , 0 , 1 }, // ldaddab
+  { 0b0111100010100000000000, kW , 0 , 1 }, // ldaddah
+  { 0b1011100011100000000000, kWX, 30, 1 }, // ldaddal
+  { 0b0011100011100000000000, kW , 0 , 1 }, // ldaddalb
+  { 0b0111100011100000000000, kW , 0 , 1 }, // ldaddalh
+  { 0b0011100000100000000000, kW , 0 , 0 }, // ldaddb
+  { 0b0111100000100000000000, kW , 0 , 0 }, // ldaddh
+  { 0b1011100001100000000000, kWX, 30, 0 }, // ldaddl
+  { 0b0011100001100000000000, kW , 0 , 0 }, // ldaddlb
+  { 0b0111100001100000000000, kW , 0 , 0 }, // ldaddlh
+  { 0b1011100000100000000100, kWX, 30, 0 }, // ldclr
+  { 0b1011100010100000000100, kWX, 30, 1 }, // ldclra
+  { 0b0011100010100000000100, kW , 0 , 1 }, // ldclrab
+  { 0b0111100010100000000100, kW , 0 , 1 }, // ldclrah
+  { 0b1011100011100000000100, kWX, 30, 1 }, // ldclral
+  { 0b0011100011100000000100, kW , 0 , 1 }, // ldclralb
+  { 0b0111100011100000000100, kW , 0 , 1 }, // ldclralh
+  { 0b0011100000100000000100, kW , 0 , 0 }, // ldclrb
+  { 0b0111100000100000000100, kW , 0 , 0 }, // ldclrh
+  { 0b1011100001100000000100, kWX, 30, 0 }, // ldclrl
+  { 0b0011100001100000000100, kW , 0 , 0 }, // ldclrlb
+  { 0b0111100001100000000100, kW , 0 , 0 }, // ldclrlh
+  { 0b1011100000100000001000, kWX, 30, 0 }, // ldeor
+  { 0b1011100010100000001000, kWX, 30, 1 }, // ldeora
+  { 0b0011100010100000001000, kW , 0 , 1 }, // ldeorab
+  { 0b0111100010100000001000, kW , 0 , 1 }, // ldeorah
+  { 0b1011100011100000001000, kWX, 30, 1 }, // ldeoral
+  { 0b0011100011100000001000, kW , 0 , 1 }, // ldeoralb
+  { 0b0111100011100000001000, kW , 0 , 1 }, // ldeoralh
+  { 0b0011100000100000001000, kW , 0 , 0 }, // ldeorb
+  { 0b0111100000100000001000, kW , 0 , 0 }, // ldeorh
+  { 0b1011100001100000001000, kWX, 30, 0 }, // ldeorl
+  { 0b0011100001100000001000, kW , 0 , 0 }, // ldeorlb
+  { 0b0111100001100000001000, kW , 0 , 0 }, // ldeorlh
+  { 0b1011100000100000001100, kWX, 30, 0 }, // ldset
+  { 0b1011100010100000001100, kWX, 30, 1 }, // ldseta
+  { 0b0011100010100000001100, kW , 0 , 1 }, // ldsetab
+  { 0b0111100010100000001100, kW , 0 , 1 }, // ldsetah
+  { 0b1011100011100000001100, kWX, 30, 1 }, // ldsetal
+  { 0b0011100011100000001100, kW , 0 , 1 }, // ldsetalb
+  { 0b0111100011100000001100, kW , 0 , 1 }, // ldsetalh
+  { 0b0011100000100000001100, kW , 0 , 0 }, // ldsetb
+  { 0b0111100000100000001100, kW , 0 , 0 }, // ldseth
+  { 0b1011100001100000001100, kWX, 30, 0 }, // ldsetl
+  { 0b0011100001100000001100, kW , 0 , 0 }, // ldsetlb
+  { 0b0111100001100000001100, kW , 0 , 0 }, // ldsetlh
+  { 0b1011100000100000010000, kWX, 30, 0 }, // ldsmax
+  { 0b1011100010100000010000, kWX, 30, 1 }, // ldsmaxa
+  { 0b0011100010100000010000, kW , 0 , 1 }, // ldsmaxab
+  { 0b0111100010100000010000, kW , 0 , 1 }, // ldsmaxah
+  { 0b1011100011100000010000, kWX, 30, 1 }, // ldsmaxal
+  { 0b0011100011100000010000, kW , 0 , 1 }, // ldsmaxalb
+  { 0b0111100011100000010000, kW , 0 , 1 }, // ldsmaxalh
+  { 0b0011100000100000010000, kW , 0 , 0 }, // ldsmaxb
+  { 0b0111100000100000010000, kW , 0 , 0 }, // ldsmaxh
+  { 0b1011100001100000010000, kWX, 30, 0 }, // ldsmaxl
+  { 0b0011100001100000010000, kW , 0 , 0 }, // ldsmaxlb
+  { 0b0111100001100000010000, kW , 0 , 0 }, // ldsmaxlh
+  { 0b1011100000100000010100, kWX, 30, 0 }, // ldsmin
+  { 0b1011100010100000010100, kWX, 30, 1 }, // ldsmina
+  { 0b0011100010100000010100, kW , 0 , 1 }, // ldsminab
+  { 0b0111100010100000010100, kW , 0 , 1 }, // ldsminah
+  { 0b1011100011100000010100, kWX, 30, 1 }, // ldsminal
+  { 0b0011100011100000010100, kW , 0 , 1 }, // ldsminalb
+  { 0b0111100011100000010100, kW , 0 , 1 }, // ldsminalh
+  { 0b0011100000100000010100, kW , 0 , 0 }, // ldsminb
+  { 0b0111100000100000010100, kW , 0 , 0 }, // ldsminh
+  { 0b1011100001100000010100, kWX, 30, 0 }, // ldsminl
+  { 0b0011100001100000010100, kW , 0 , 0 }, // ldsminlb
+  { 0b0111100001100000010100, kW , 0 , 0 }, // ldsminlh
+  { 0b1011100000100000011000, kWX, 30, 0 }, // ldumax
+  { 0b1011100010100000011000, kWX, 30, 1 }, // ldumaxa
+  { 0b0011100010100000011000, kW , 0 , 1 }, // ldumaxab
+  { 0b0111100010100000011000, kW , 0 , 1 }, // ldumaxah
+  { 0b1011100011100000011000, kWX, 30, 1 }, // ldumaxal
+  { 0b0011100011100000011000, kW , 0 , 1 }, // ldumaxalb
+  { 0b0111100011100000011000, kW , 0 , 1 }, // ldumaxalh
+  { 0b0011100000100000011000, kW , 0 , 0 }, // ldumaxb
+  { 0b0111100000100000011000, kW , 0 , 0 }, // ldumaxh
+  { 0b1011100001100000011000, kWX, 30, 0 }, // ldumaxl
+  { 0b0011100001100000011000, kW , 0 , 0 }, // ldumaxlb
+  { 0b0111100001100000011000, kW , 0 , 0 }, // ldumaxlh
+  { 0b1011100000100000011100, kWX, 30, 0 }, // ldumin
+  { 0b1011100010100000011100, kWX, 30, 1 }, // ldumina
+  { 0b0011100010100000011100, kW , 0 , 1 }, // lduminab
+  { 0b0111100010100000011100, kW , 0 , 1 }, // lduminah
+  { 0b1011100011100000011100, kWX, 30, 1 }, // lduminal
+  { 0b0011100011100000011100, kW , 0 , 1 }, // lduminalb
+  { 0b0111100011100000011100, kW , 0 , 1 }, // lduminalh
+  { 0b0011100000100000011100, kW , 0 , 0 }, // lduminb
+  { 0b0111100000100000011100, kW , 0 , 0 }, // lduminh
+  { 0b1011100001100000011100, kWX, 30, 0 }, // lduminl
+  { 0b0011100001100000011100, kW , 0 , 0 }, // lduminlb
+  { 0b0111100001100000011100, kW , 0 , 0 }, // lduminlh
+  { 0b1000100000000000111111, kWX, 30, 1 }, // stlxr
+  { 0b0000100000000000111111, kW , 0 , 1 }, // stlxrb
+  { 0b0100100000000000111111, kW , 0 , 1 }, // stlxrh
+  { 0b1011100000100000100000, kWX, 30, 1 }, // swp
+  { 0b1011100010100000100000, kWX, 30, 1 }, // swpa
+  { 0b0011100010100000100000, kW , 0 , 1 }, // swpab
+  { 0b0111100010100000100000, kW , 0 , 1 }, // swpah
+  { 0b1011100011100000100000, kWX, 30, 1 }, // swpal
+  { 0b0011100011100000100000, kW , 0 , 1 }, // swpalb
+  { 0b0111100011100000100000, kW , 0 , 1 }, // swpalh
+  { 0b0011100000100000100000, kW , 0 , 1 }, // swpb
+  { 0b0111100000100000100000, kW , 0 , 1 }, // swph
+  { 0b1011100001100000100000, kWX, 30, 1 }, // swpl
+  { 0b0011100001100000100000, kW , 0 , 1 }, // swplb
+  { 0b0111100001100000100000, kW , 0 , 1 }  // swplh
+};
+
+const BaseAtomicSt baseAtomicSt[48] = {
+  { 0b1011100000100000000000, kWX, 30 }, // stadd
+  { 0b1011100001100000000000, kWX, 30 }, // staddl
+  { 0b0011100000100000000000, kW , 0  }, // staddb
+  { 0b0011100001100000000000, kW , 0  }, // staddlb
+  { 0b0111100000100000000000, kW , 0  }, // staddh
+  { 0b0111100001100000000000, kW , 0  }, // staddlh
+  { 0b1011100000100000000100, kWX, 30 }, // stclr
+  { 0b1011100001100000000100, kWX, 30 }, // stclrl
+  { 0b0011100000100000000100, kW , 0  }, // stclrb
+  { 0b0011100001100000000100, kW , 0  }, // stclrlb
+  { 0b0111100000100000000100, kW , 0  }, // stclrh
+  { 0b0111100001100000000100, kW , 0  }, // stclrlh
+  { 0b1011100000100000001000, kWX, 30 }, // steor
+  { 0b1011100001100000001000, kWX, 30 }, // steorl
+  { 0b0011100000100000001000, kW , 0  }, // steorb
+  { 0b0011100001100000001000, kW , 0  }, // steorlb
+  { 0b0111100000100000001000, kW , 0  }, // steorh
+  { 0b0111100001100000001000, kW , 0  }, // steorlh
+  { 0b1011100000100000001100, kWX, 30 }, // stset
+  { 0b1011100001100000001100, kWX, 30 }, // stsetl
+  { 0b0011100000100000001100, kW , 0  }, // stsetb
+  { 0b0011100001100000001100, kW , 0  }, // stsetlb
+  { 0b0111100000100000001100, kW , 0  }, // stseth
+  { 0b0111100001100000001100, kW , 0  }, // stsetlh
+  { 0b1011100000100000010000, kWX, 30 }, // stsmax
+  { 0b1011100001100000010000, kWX, 30 }, // stsmaxl
+  { 0b0011100000100000010000, kW , 0  }, // stsmaxb
+  { 0b0011100001100000010000, kW , 0  }, // stsmaxlb
+  { 0b0111100000100000010000, kW , 0  }, // stsmaxh
+  { 0b0111100001100000010000, kW , 0  }, // stsmaxlh
+  { 0b1011100000100000010100, kWX, 30 }, // stsmin
+  { 0b1011100001100000010100, kWX, 30 }, // stsminl
+  { 0b0011100000100000010100, kW , 0  }, // stsminb
+  { 0b0011100001100000010100, kW , 0  }, // stsminlb
+  { 0b0111100000100000010100, kW , 0  }, // stsminh
+  { 0b0111100001100000010100, kW , 0  }, // stsminlh
+  { 0b1011100000100000011000, kWX, 30 }, // stumax
+  { 0b1011100001100000011000, kWX, 30 }, // stumaxl
+  { 0b0011100000100000011000, kW , 0  }, // stumaxb
+  { 0b0011100001100000011000, kW , 0  }, // stumaxlb
+  { 0b0111100000100000011000, kW , 0  }, // stumaxh
+  { 0b0111100001100000011000, kW , 0  }, // stumaxlh
+  { 0b1011100000100000011100, kWX, 30 }, // stumin
+  { 0b1011100001100000011100, kWX, 30 }, // stuminl
+  { 0b0011100000100000011100, kW , 0  }, // stuminb
+  { 0b0011100001100000011100, kW , 0  }, // stuminlb
+  { 0b0111100000100000011100, kW , 0  }, // stuminh
+  { 0b0111100001100000011100, kW , 0  }  // stuminlh
+};
+
+const BaseBfc baseBfc[1] = {
+  { 0b00110011000000000000001111100000 }  // bfc
+};
+
+const BaseBfi baseBfi[3] = {
+  { 0b00110011000000000000000000000000 }, // bfi
+  { 0b00010011000000000000000000000000 }, // sbfiz
+  { 0b01010011000000000000000000000000 }  // ubfiz
+};
+
+const BaseBfm baseBfm[3] = {
+  { 0b00110011000000000000000000000000 }, // bfm
+  { 0b00010011000000000000000000000000 }, // sbfm
+  { 0b01010011000000000000000000000000 }  // ubfm
+};
+
+const BaseBfx baseBfx[3] = {
+  { 0b00110011000000000000000000000000 }, // bfxil
+  { 0b00010011000000000000000000000000 }, // sbfx
+  { 0b01010011000000000000000000000000 }  // ubfx
+};
+
+const BaseBranchCmp baseBranchCmp[2] = {
+  { 0b00110101000000000000000000000000 }, // cbnz
+  { 0b00110100000000000000000000000000 }  // cbz
+};
+
+const BaseBranchReg baseBranchReg[3] = {
+  { 0b11010110001111110000000000000000 }, // blr
+  { 0b11010110000111110000000000000000 }, // br
+  { 0b11010110010111110000000000000000 }  // ret
+};
+
+const BaseBranchRel baseBranchRel[2] = {
+  { 0b00010100000000000000000000000000 }, // b
+  { 0b10010100000000000000000000000000 }  // bl
+};
+
+const BaseBranchTst baseBranchTst[2] = {
+  { 0b00110111000000000000000000000000 }, // tbnz
+  { 0b00110110000000000000000000000000 }  // tbz
+};
+
+const BaseCCmp baseCCmp[2] = {
+  { 0b00111010010000000000000000000000 }, // ccmn
+  { 0b01111010010000000000000000000000 }  // ccmp
+};
+
+const BaseCInc baseCInc[3] = {
+  { 0b00011010100000000000010000000000 }, // cinc
+  { 0b01011010100000000000000000000000 }, // cinv
+  { 0b01011010100000000000010000000000 }  // cneg
+};
+
+const BaseCSel baseCSel[4] = {
+  { 0b00011010100000000000000000000000 }, // csel
+  { 0b00011010100000000000010000000000 }, // csinc
+  { 0b01011010100000000000000000000000 }, // csinv
+  { 0b01011010100000000000010000000000 }  // csneg
+};
+
+const BaseCSet baseCSet[2] = {
+  { 0b00011010100111110000011111100000 }, // cset
+  { 0b01011010100111110000001111100000 }  // csetm
+};
+
+const BaseCmpCmn baseCmpCmn[2] = {
+  { 0b0101011000, 0b0101011001, 0b0110001 }, // cmn
+  { 0b1101011000, 0b1101011001, 0b1110001 }  // cmp
+};
+
+const BaseExtend baseExtend[5] = {
+  { 0b0001001100000000000111, kWX, 0 }, // sxtb
+  { 0b0001001100000000001111, kWX, 0 }, // sxth
+  { 0b1001001101000000011111, kX , 0 }, // sxtw
+  { 0b0101001100000000000111, kW, 1 }, // uxtb
+  { 0b0101001100000000001111, kW, 1 }  // uxth
+};
+
+const BaseExtract baseExtract[1] = {
+  { 0b00010011100000000000000000000000 }  // extr
+};
+
+const BaseLdSt baseLdSt[9] = {
+  { 0b1011100101, 0b10111000010, 0b10111000011, 0b00011000, kWX, 30, 2, Inst::kIdLdur }, // ldr
+  { 0b0011100101, 0b00111000010, 0b00111000011, 0         , kW , 0 , 0, Inst::kIdLdurb }, // ldrb
+  { 0b0111100101, 0b01111000010, 0b01111000011, 0         , kW , 0 , 1, Inst::kIdLdurh }, // ldrh
+  { 0b0011100111, 0b00111000100, 0b00111000101, 0         , kWX, 22, 0, Inst::kIdLdursb }, // ldrsb
+  { 0b0111100110, 0b01111000100, 0b01111000101, 0         , kWX, 22, 1, Inst::kIdLdursh }, // ldrsh
+  { 0b1011100110, 0b10111000100, 0b10111000101, 0b10011000, kX , 0 , 2, Inst::kIdLdursw }, // ldrsw
+  { 0b1011100100, 0b10111000000, 0b10111000001, 0         , kWX, 30, 2, Inst::kIdStur }, // str
+  { 0b0011100100, 0b00111000000, 0b00111000001, 0         , kW , 30, 0, Inst::kIdSturb }, // strb
+  { 0b0111100100, 0b01111000000, 0b01111000001, 0         , kWX, 30, 1, Inst::kIdSturh }  // strh
+};
+
+const BaseLdpStp baseLdpStp[6] = {
+  { 0b0010100001, 0           , kWX, 31, 2 }, // ldnp
+  { 0b0010100101, 0b0010100011, kWX, 31, 2 }, // ldp
+  { 0b0110100101, 0b0110100011, kX , 0 , 2 }, // ldpsw
+  { 0b0110100100, 0b0110100010, kX, 0, 4 }, // stgp
+  { 0b0010100000, 0           , kWX, 31, 2 }, // stnp
+  { 0b0010100100, 0b0010100010, kWX, 31, 2 }  // stp
+};
+
+const BaseLdxp baseLdxp[2] = {
+  { 0b1000100001111111100000, kWX, 30 }, // ldaxp
+  { 0b1000100001111111000000, kWX, 30 }  // ldxp
+};
+
+const BaseLogical baseLogical[8] = {
+  { 0b0001010000, 0b00100100, 0 }, // and
+  { 0b1101010000, 0b11100100, 0 }, // ands
+  { 0b0001010001, 0b00100100, 1 }, // bic
+  { 0b1101010001, 0b11100100, 1 }, // bics
+  { 0b1001010001, 0b10100100, 1 }, // eon
+  { 0b1001010000, 0b10100100, 0 }, // eor
+  { 0b0101010001, 0b01100100, 1 }, // orn
+  { 0b0101010000, 0b01100100, 0 }  // orr
+};
+
+const BaseMovKNZ baseMovKNZ[3] = {
+  { 0b01110010100000000000000000000000 }, // movk
+  { 0b00010010100000000000000000000000 }, // movn
+  { 0b01010010100000000000000000000000 }  // movz
+};
+
+const BaseMvnNeg baseMvnNeg[3] = {
+  { 0b00101010001000000000001111100000 }, // mvn
+  { 0b01001011000000000000001111100000 }, // neg
+  { 0b01101011000000000000001111100000 }  // negs
+};
+
+const BaseOp baseOp[23] = {
+  { 0b11010101000000110010000110011111 }, // autia1716
+  { 0b11010101000000110010001110111111 }, // autiasp
+  { 0b11010101000000110010001110011111 }, // autiaz
+  { 0b11010101000000110010000111011111 }, // autib1716
+  { 0b11010101000000110010001111111111 }, // autibsp
+  { 0b11010101000000110010001111011111 }, // autibz
+  { 0b11010101000000000100000001011111 }, // axflag
+  { 0b11010101000000000100000000011111 }, // cfinv
+  { 0b11010101000000110010001010011111 }, // csdb
+  { 0b11010101000000110010000011011111 }, // dgh
+  { 0b11010110101111110000001111100000 }, // drps
+  { 0b11010101000000110010001000011111 }, // esb
+  { 0b11010110100111110000001111100000 }, // eret
+  { 0b11010101000000110010000000011111 }, // nop
+  { 0b11010101000000110011010010011111 }, // pssbb
+  { 0b11010101000000110010000010011111 }, // sev
+  { 0b11010101000000110010000010111111 }, // sevl
+  { 0b11010101000000110011000010011111 }, // ssbb
+  { 0b11010101000000110010000001011111 }, // wfe
+  { 0b11010101000000110010000001111111 }, // wfi
+  { 0b11010101000000000100000000111111 }, // xaflag
+  { 0b11010101000000110010000011111111 }, // xpaclri
+  { 0b11010101000000110010000000111111 }  // yield
+};
+
+const BaseOpImm baseOpImm[14] = {
+  { 0b11010100001000000000000000000000, 16, 5 }, // brk
+  { 0b11010101000000110011000001011111, 4, 8 }, // clrex
+  { 0b11010100101000000000000000000001, 16, 5 }, // dcps1
+  { 0b11010100101000000000000000000010, 16, 5 }, // dcps2
+  { 0b11010100101000000000000000000011, 16, 5 }, // dcps3
+  { 0b11010101000000110011000010111111, 4, 8 }, // dmb
+  { 0b11010101000000110011000010011111, 4, 8 }, // dsb
+  { 0b11010101000000110010000000011111, 7, 5 }, // hint
+  { 0b11010100010000000000000000000000, 16, 5 }, // hlt
+  { 0b11010100000000000000000000000010, 16, 5 }, // hvc
+  { 0b11010101000000110011000011011111, 4, 8 }, // isb
+  { 0b11010100000000000000000000000011, 16, 5 }, // smc
+  { 0b11010100000000000000000000000001, 16, 5 }, // svc
+  { 0b00000000000000000000000000000000, 16, 0 }  // udf
+};
+
+const BaseR baseR[10] = {
+  { 0b11011010110000010011101111100000, kX, kZR, 0 }, // autdza
+  { 0b11011010110000010011111111100000, kX, kZR, 0 }, // autdzb
+  { 0b11011010110000010011001111100000, kX, kZR, 0 }, // autiza
+  { 0b11011010110000010011011111100000, kX, kZR, 0 }, // autizb
+  { 0b11011010110000010010101111100000, kX, kZR, 0 }, // pacdza
+  { 0b11011010110000010010111111100000, kX, kZR, 0 }, // pacdzb
+  { 0b00111010000000000000100000001101, kW, kZR, 5 }, // setf8
+  { 0b00111010000000000100100000001101, kW, kZR, 5 }, // setf16
+  { 0b11011010110000010100011111100000, kX, kZR, 0 }, // xpacd
+  { 0b11011010110000010100001111100000, kX, kZR, 0 }  // xpaci
+};
+
+const BaseRM_NoImm baseRM_NoImm[21] = {
+  { 0b1000100011011111111111, kWX, kZR, 30 }, // ldar
+  { 0b0000100011011111111111, kW , kZR, 0  }, // ldarb
+  { 0b0100100011011111111111, kW , kZR, 0  }, // ldarh
+  { 0b1000100001011111111111, kWX, kZR, 30 }, // ldaxr
+  { 0b0000100001011111111111, kW , kZR, 0  }, // ldaxrb
+  { 0b0100100001011111111111, kW , kZR, 0  }, // ldaxrh
+  { 0b1101100111100000000000, kX , kZR, 0  }, // ldgm
+  { 0b1000100011011111011111, kWX, kZR, 30 }, // ldlar
+  { 0b0000100011011111011111, kW , kZR, 0  }, // ldlarb
+  { 0b0100100011011111011111, kW , kZR, 0  }, // ldlarh
+  { 0b1000100001011111011111, kWX, kZR, 30 }, // ldxr
+  { 0b0000100001011111011111, kW , kZR, 0  }, // ldxrb
+  { 0b0100100001011111011111, kW , kZR, 0  }, // ldxrh
+  { 0b1101100110100000000000, kX , kZR, 0  }, // stgm
+  { 0b1000100010011111011111, kWX, kZR, 30 }, // stllr
+  { 0b0000100010011111011111, kW , kZR, 0  }, // stllrb
+  { 0b0100100010011111011111, kW , kZR, 0  }, // stllrh
+  { 0b1000100010011111111111, kWX, kZR, 30 }, // stlr
+  { 0b0000100010011111111111, kW , kZR, 0  }, // stlrb
+  { 0b0100100010011111111111, kW , kZR, 0  }, // stlrh
+  { 0b1101100100100000000000, kX , kZR, 0 }  // stzgm
+};
+
+const BaseRM_SImm10 baseRM_SImm10[2] = {
+  { 0b1111100000100000000001, kX , kZR, 0, 3 }, // ldraa
+  { 0b1111100010100000000001, kX , kZR, 0, 3 }  // ldrab
+};
+
+const BaseRM_SImm9 baseRM_SImm9[23] = {
+  { 0b1101100101100000000000, 0b0000000000000000000000, kX , kZR, 0, 4 }, // ldg
+  { 0b1011100001000000000010, 0b0000000000000000000000, kWX, kZR, 30, 0 }, // ldtr
+  { 0b0011100001000000000010, 0b0000000000000000000000, kW , kZR, 0 , 0 }, // ldtrb
+  { 0b0111100001000000000010, 0b0000000000000000000000, kW , kZR, 0 , 0 }, // ldtrh
+  { 0b0011100011000000000010, 0b0000000000000000000000, kWX, kZR, 22, 0 }, // ldtrsb
+  { 0b0111100011000000000010, 0b0000000000000000000000, kWX, kZR, 22, 0 }, // ldtrsh
+  { 0b1011100010000000000010, 0b0000000000000000000000, kX , kZR, 0 , 0 }, // ldtrsw
+  { 0b1011100001000000000000, 0b0000000000000000000000, kWX, kZR, 30, 0 }, // ldur
+  { 0b0011100001000000000000, 0b0000000000000000000000, kW , kZR, 0 , 0 }, // ldurb
+  { 0b0111100001000000000000, 0b0000000000000000000000, kW , kZR, 0 , 0 }, // ldurh
+  { 0b0011100011000000000000, 0b0000000000000000000000, kWX, kZR, 22, 0 }, // ldursb
+  { 0b0111100011000000000000, 0b0000000000000000000000, kWX, kZR, 22, 0 }, // ldursh
+  { 0b1011100010000000000000, 0b0000000000000000000000, kWX, kZR, 0 , 0 }, // ldursw
+  { 0b1101100110100000000010, 0b1101100110100000000001, kX, kSP, 0, 4 }, // st2g
+  { 0b1101100100100000000010, 0b1101100100100000000001, kX, kSP, 0, 4 }, // stg
+  { 0b1011100000000000000010, 0b0000000000000000000000, kWX, kZR, 30, 0 }, // sttr
+  { 0b0011100000000000000010, 0b0000000000000000000000, kW , kZR, 0 , 0 }, // sttrb
+  { 0b0111100000000000000010, 0b0000000000000000000000, kW , kZR, 0 , 0 }, // sttrh
+  { 0b1011100000000000000000, 0b0000000000000000000000, kWX, kZR, 30, 0 }, // stur
+  { 0b0011100000000000000000, 0b0000000000000000000000, kW , kZR, 0 , 0 }, // sturb
+  { 0b0111100000000000000000, 0b0000000000000000000000, kW , kZR, 0 , 0 }, // sturh
+  { 0b1101100111100000000010, 0b1101100111100000000001, kX , kSP, 0, 4 }, // stz2g
+  { 0b1101100101100000000010, 0b1101100101100000000001, kX , kSP, 0, 4 }  // stzg
+};
+
+const BaseRR baseRR[15] = {
+  { 0b11011010110000010001100000000000, kX, kZR, 0, kX, kSP, 5, true }, // autda
+  { 0b11011010110000010001110000000000, kX, kZR, 0, kX, kSP, 5, true }, // autdb
+  { 0b11011010110000010001000000000000, kX, kZR, 0, kX, kSP, 5, true }, // autia
+  { 0b11011010110000010001010000000000, kX, kZR, 0, kX, kSP, 5, true }, // autib
+  { 0b01011010110000000001010000000000, kWX, kZR, 0, kWX, kZR, 5, true }, // cls
+  { 0b01011010110000000001000000000000, kWX, kZR, 0, kWX, kZR, 5, true }, // clz
+  { 0b10111010110000000000000000011111, kX, kSP, 5, kX, kSP, 16, true }, // cmpp
+  { 0b01011010000000000000001111100000, kWX, kZR, 0, kWX, kZR, 16, true }, // ngc
+  { 0b01111010000000000000001111100000, kWX, kZR, 0, kWX, kZR, 16, true }, // ngcs
+  { 0b11011010110000010000100000000000, kX, kZR, 0, kX, kSP, 5, true }, // pacda
+  { 0b11011010110000010000110000000000, kX, kZR, 0, kX, kSP, 5, true }, // pacdb
+  { 0b01011010110000000000000000000000, kWX, kZR, 0, kWX, kZR, 5, true }, // rbit
+  { 0b01011010110000000000010000000000, kWX, kZR, 0, kWX, kZR, 5, true }, // rev16
+  { 0b11011010110000000000100000000000, kWX, kZR, 0, kWX, kZR, 5, true }, // rev32
+  { 0b11011010110000000000110000000000, kWX, kZR, 0, kWX, kZR, 5, true }  // rev64
+};
+
+const BaseRRII baseRRII[2] = {
+  { 0b1001000110000000000000, kX, kSP, kX, kSP, 6, 4, 16, 4, 0, 10 }, // addg
+  { 0b1101000110000000000000, kX, kSP, kX, kSP, 6, 4, 16, 4, 0, 10 }  // subg
+};
+
+const BaseRRR baseRRR[26] = {
+  { 0b0001101000000000000000, kWX, kZR, kWX, kZR, kWX, kZR, true }, // adc
+  { 0b0011101000000000000000, kWX, kZR, kWX, kZR, kWX, kZR, true }, // adcs
+  { 0b0001101011000000010000, kW, kZR, kW, kZR, kW, kZR, false }, // crc32b
+  { 0b0001101011000000010100, kW, kZR, kW, kZR, kW, kZR, false }, // crc32cb
+  { 0b0001101011000000010101, kW, kZR, kW, kZR, kW, kZR, false }, // crc32ch
+  { 0b0001101011000000010110, kW, kZR, kW, kZR, kW, kZR, false }, // crc32cw
+  { 0b1001101011000000010111, kW, kZR, kW, kZR, kX, kZR, false }, // crc32cx
+  { 0b0001101011000000010001, kW, kZR, kW, kZR, kW, kZR, false }, // crc32h
+  { 0b0001101011000000010010, kW, kZR, kW, kZR, kW, kZR, false }, // crc32w
+  { 0b1001101011000000010011, kW, kZR, kW, kZR, kX, kZR, false }, // crc32x
+  { 0b1001101011000000000101, kX , kZR, kX , kSP, kX , kZR, true }, // gmi
+  { 0b0001101100000000111111, kWX, kZR, kWX, kZR, kWX, kZR, true }, // mneg
+  { 0b0001101100000000011111, kWX, kZR, kWX, kZR, kWX, kZR, true }, // mul
+  { 0b1001101011000000001100, kX, kZR, kX, kZR, kX, kSP, false }, // pacga
+  { 0b0101101000000000000000, kWX, kZR, kWX, kZR, kWX, kZR, true }, // sbc
+  { 0b0111101000000000000000, kWX, kZR, kWX, kZR, kWX, kZR, true }, // sbcs
+  { 0b0001101011000000000011, kWX, kZR, kWX, kZR, kWX, kZR, true }, // sdiv
+  { 0b1001101100100000111111, kX , kZR, kW , kZR, kW , kZR, false }, // smnegl
+  { 0b1001101101000000011111, kX , kZR, kX , kZR, kX , kZR, true }, // smulh
+  { 0b1001101100100000011111, kX , kZR, kW , kZR, kW , kZR, false }, // smull
+  { 0b1001101011000000000000, kX, kZR, kX, kSP, kX, kSP, false }, // subp
+  { 0b1011101011000000000000, kX, kZR, kX, kSP, kX, kSP, false }, // subps
+  { 0b0001101011000000000010, kWX, kZR, kWX, kZR, kWX, kZR, true }, // udiv
+  { 0b1001101110100000111111, kX , kZR, kW , kZR, kW , kZR, false }, // umnegl
+  { 0b1001101110100000011111, kX , kZR, kW , kZR, kW , kZR, false }, // umull
+  { 0b1001101111000000011111, kX , kZR, kX , kZR, kX , kZR, false }  // umulh
+};
+
+const BaseRRRR baseRRRR[6] = {
+  { 0b0001101100000000000000, kWX, kZR, kWX, kZR, kWX, kZR, kWX, kZR, true }, // madd
+  { 0b0001101100000000100000, kWX, kZR, kWX, kZR, kWX, kZR, kWX, kZR, true }, // msub
+  { 0b1001101100100000000000, kX , kZR, kW , kZR, kW , kZR, kX , kZR, false }, // smaddl
+  { 0b1001101100100000100000, kX , kZR, kW , kZR, kW , kZR, kX , kZR, false }, // smsubl
+  { 0b1001101110100000000000, kX , kZR, kW , kZR, kW , kZR, kX , kZR, false }, // umaddl
+  { 0b1001101110100000100000, kX , kZR, kW , kZR, kW , kZR, kX , kZR, false }  // umsubl
+};
+
+const BaseShift baseShift[8] = {
+  { 0b0001101011000000001010, 0b0001001100000000011111, 0 }, // asr
+  { 0b0001101011000000001010, 0b0000000000000000000000, 0 }, // asrv
+  { 0b0001101011000000001000, 0b0101001100000000000000, 0 }, // lsl
+  { 0b0001101011000000001000, 0b0000000000000000000000, 0 }, // lslv
+  { 0b0001101011000000001001, 0b0101001100000000011111, 0 }, // lsr
+  { 0b0001101011000000001001, 0b0000000000000000000000, 0 }, // lsrv
+  { 0b0001101011000000001011, 0b0001001110000000000000, 1 }, // ror
+  { 0b0001101011000000001011, 0b0000000000000000000000, 1 }  // rorv
+};
+
+const BaseStx baseStx[3] = {
+  { 0b1000100000000000011111, kWX, 30 }, // stxr
+  { 0b0000100000000000011111, kW , 0  }, // stxrb
+  { 0b0100100000000000011111, kW , 0  }  // stxrh
+};
+
+const BaseStxp baseStxp[2] = {
+  { 0b1000100000100000100000, kWX, 30 }, // stlxp
+  { 0b1000100000100000000000, kWX, 30 }  // stxp
+};
+
+const BaseTst baseTst[1] = {
+  { 0b1101010000, 0b111001000 }  // tst
+};
+
+const FSimdPair fSimdPair[5] = {
+  { 0b0111111000110000110110, 0b0010111000100000110101 }, // faddp_v
+  { 0b0111111000110000110010, 0b0010111000100000110001 }, // fmaxnmp_v
+  { 0b0111111000110000111110, 0b0010111000100000111101 }, // fmaxp_v
+  { 0b0111111010110000110010, 0b0010111010100000110001 }, // fminnmp_v
+  { 0b0111111010110000111110, 0b0010111010100000111101 }  // fminp_v
+};
+
+const FSimdSV fSimdSV[4] = {
+  { 0b0010111000110000110010 }, // fmaxnmv_v
+  { 0b0010111000110000111110 }, // fmaxv_v
+  { 0b0010111010110000110010 }, // fminnmv_v
+  { 0b0010111010110000111110 }  // fminv_v
+};
+
+const FSimdVV fSimdVV[17] = {
+  { 0b0001111000100000110000, kHF_A, 0b0000111010100000111110, kHF_B }, // fabs_v
+  { 0b0001111000100001010000, kHF_A, 0b0010111010100000111110, kHF_B }, // fneg_v
+  { 0b0101111010100001110110, kHF_B, 0b0000111010100001110110, kHF_B }, // frecpe_v
+  { 0b0101111010100001111110, kHF_B, 0b0000000000000000000000, kHF_N }, // frecpx_v
+  { 0b0001111000101000110000, kHF_N, 0b0010111000100001111010, kHF_N }, // frint32x_v
+  { 0b0001111000101000010000, kHF_N, 0b0000111000100001111010, kHF_N }, // frint32z_v
+  { 0b0001111000101001110000, kHF_N, 0b0010111000100001111110, kHF_N }, // frint64x_v
+  { 0b0001111000101001010000, kHF_N, 0b0000111000100001111110, kHF_N }, // frint64z_v
+  { 0b0001111000100110010000, kHF_A, 0b0010111000100001100010, kHF_B }, // frinta_v
+  { 0b0001111000100111110000, kHF_A, 0b0010111010100001100110, kHF_B }, // frinti_v
+  { 0b0001111000100101010000, kHF_A, 0b0000111000100001100110, kHF_B }, // frintm_v
+  { 0b0001111000100100010000, kHF_A, 0b0000111000100001100010, kHF_B }, // frintn_v
+  { 0b0001111000100100110000, kHF_A, 0b0000111010100001100010, kHF_B }, // frintp_v
+  { 0b0001111000100111010000, kHF_A, 0b0010111000100001100110, kHF_B }, // frintx_v
+  { 0b0001111000100101110000, kHF_A, 0b0000111010100001100110, kHF_B }, // frintz_v
+  { 0b0111111010100001110110, kHF_B, 0b0010111010100001110110, kHF_B }, // frsqrte_v
+  { 0b0001111000100001110000, kHF_A, 0b0010111010100001111110, kHF_B }  // fsqrt_v
+};
+
+const FSimdVVV fSimdVVV[13] = {
+  { 0b0111111010100000110101, kHF_C, 0b0010111010100000110101, kHF_C }, // fabd_v
+  { 0b0111111000100000111011, kHF_C, 0b0010111000100000111011, kHF_C }, // facge_v
+  { 0b0111111010100000111011, kHF_C, 0b0010111010100000111011, kHF_C }, // facgt_v
+  { 0b0001111000100000001010, kHF_A, 0b0000111000100000110101, kHF_C }, // fadd_v
+  { 0b0001111000100000000110, kHF_A, 0b0010111000100000111111, kHF_C }, // fdiv_v
+  { 0b0001111000100000010010, kHF_A, 0b0000111000100000111101, kHF_C }, // fmax_v
+  { 0b0001111000100000011010, kHF_A, 0b0000111000100000110001, kHF_C }, // fmaxnm_v
+  { 0b0001111000100000010110, kHF_A, 0b0000111010100000111101, kHF_C }, // fmin_v
+  { 0b0001111000100000011110, kHF_A, 0b0000111010100000110001, kHF_C }, // fminnm_v
+  { 0b0001111000100000100010, kHF_A, 0b0000000000000000000000, kHF_N }, // fnmul_v
+  { 0b0101111000100000111111, kHF_C, 0b0000111000100000111111, kHF_C }, // frecps_v
+  { 0b0101111010100000111111, kHF_C, 0b0000111010100000111111, kHF_C }, // frsqrts_v
+  { 0b0001111000100000001110, kHF_A, 0b0000111010100000110101, kHF_C }  // fsub_v
+};
+
+const FSimdVVVV fSimdVVVV[4] = {
+  { 0b0001111100000000000000, kHF_A, 0b0000000000000000000000, kHF_N }, // fmadd_v
+  { 0b0001111100000000100000, kHF_A, 0b0000000000000000000000, kHF_N }, // fmsub_v
+  { 0b0001111100100000000000, kHF_A, 0b0000000000000000000000, kHF_N }, // fnmadd_v
+  { 0b0001111100100000100000, kHF_A, 0b0000000000000000000000, kHF_N }  // fnmsub_v
+};
+
+const FSimdVVVe fSimdVVVe[4] = {
+  { 0b0000000000000000000000, kHF_N, 0b0000111000100000110011, 0b0000111110000000000100 }, // fmla_v
+  { 0b0000000000000000000000, kHF_N, 0b0000111010100000110011, 0b0000111110000000010100 }, // fmls_v
+  { 0b0001111000100000000010, kHF_A, 0b0010111000100000110111, 0b0000111110000000100100 }, // fmul_v
+  { 0b0101111000100000110111, kHF_C, 0b0000111000100000110111, 0b0010111110000000100100 }  // fmulx_v
+};
+
+const ISimdPair iSimdPair[1] = {
+  { 0b0101111000110001101110, 0b0000111000100000101111, kVO_V_Any }  // addp_v
+};
+
+const ISimdSV iSimdSV[7] = {
+  { 0b0000111000110001101110, kVO_V_BH_4S }, // addv_v
+  { 0b0000111000110000001110, kVO_V_BH_4S }, // saddlv_v
+  { 0b0000111000110000101010, kVO_V_BH_4S }, // smaxv_v
+  { 0b0000111000110001101010, kVO_V_BH_4S }, // sminv_v
+  { 0b0010111000110000001110, kVO_V_BH_4S }, // uaddlv_v
+  { 0b0010111000110000101010, kVO_V_BH_4S }, // umaxv_v
+  { 0b0010111000110001101010, kVO_V_BH_4S }  // uminv_v
+};
+
+const ISimdVV iSimdVV[29] = {
+  { 0b0000111000100000101110, kVO_V_Any }, // abs_v
+  { 0b0000111000100000010010, kVO_V_BHS }, // cls_v
+  { 0b0010111000100000010010, kVO_V_BHS }, // clz_v
+  { 0b0000111000100000010110, kVO_V_B }, // cnt_v
+  { 0b0010111000100000010110, kVO_V_B }, // mvn_v
+  { 0b0010111000100000101110, kVO_V_Any }, // neg_v
+  { 0b0010111000100000010110, kVO_V_B }, // not_v
+  { 0b0010111001100000010110, kVO_V_B }, // rbit_v
+  { 0b0000111000100000000110, kVO_V_B }, // rev16_v
+  { 0b0010111000100000000010, kVO_V_BH }, // rev32_v
+  { 0b0000111000100000000010, kVO_V_BHS }, // rev64_v
+  { 0b0000111000100000011010, kVO_V_BHS }, // sadalp_v
+  { 0b0000111000100000001010, kVO_V_BHS }, // saddlp_v
+  { 0b0000111000100000011110, kVO_SV_Any }, // sqabs_v
+  { 0b0010111000100000011110, kVO_SV_Any }, // sqneg_v
+  { 0b0000111000100001010010, kVO_SV_B8H4S2 }, // sqxtn_v
+  { 0b0100111000100001010010, kVO_V_B16H8S4 }, // sqxtn2_v
+  { 0b0010111000100001001010, kVO_SV_B8H4S2 }, // sqxtun_v
+  { 0b0110111000100001001010, kVO_V_B16H8S4 }, // sqxtun2_v
+  { 0b0000111000100000001110, kVO_SV_Any }, // suqadd_v
+  { 0b0010111000100000011010, kVO_V_BHS }, // uadalp_v
+  { 0b0010111000100000001010, kVO_V_BHS }, // uaddlp_v
+  { 0b0010111000100001010010, kVO_SV_B8H4S2 }, // uqxtn_v
+  { 0b0110111000100001010010, kVO_V_B16H8S4 }, // uqxtn2_v
+  { 0b0000111010100001110010, kVO_V_S }, // urecpe_v
+  { 0b0010111010100001110010, kVO_V_S }, // ursqrte_v
+  { 0b0010111000100000001110, kVO_SV_Any }, // usqadd_v
+  { 0b0000111000100001001010, kVO_V_B8H4S2 }, // xtn_v
+  { 0b0100111000100001001010, kVO_V_B16H8S4 }  // xtn2_v
+};
+
+const ISimdVVV iSimdVVV[65] = {
+  { 0b0000111000100000100001, kVO_V_Any }, // add_v
+  { 0b0000111000100000010000, kVO_V_B8H4S2 }, // addhn_v
+  { 0b0100111000100000010000, kVO_V_B16H8S4 }, // addhn2_v
+  { 0b0000111000100000000111, kVO_V_B }, // and_v
+  { 0b0010111011100000000111, kVO_V_B }, // bif_v
+  { 0b0010111010100000000111, kVO_V_B }, // bit_v
+  { 0b0010111001100000000111, kVO_V_B }, // bsl_v
+  { 0b0000111000100000100011, kVO_V_Any }, // cmtst_v
+  { 0b0010111000100000000111, kVO_V_B }, // eor_v
+  { 0b0000111011100000000111, kVO_V_B }, // orn_v
+  { 0b0010111000100000100111, kVO_V_B }, // pmul_v
+  { 0b0000111000100000111000, kVO_V_B8D1 }, // pmull_v
+  { 0b0100111000100000111000, kVO_V_B16D2 }, // pmull2_v
+  { 0b0010111000100000010000, kVO_V_B8H4S2 }, // raddhn_v
+  { 0b0110111000100000010000, kVO_V_B16H8S4 }, // raddhn2_v
+  { 0b1100111001100000100011, kVO_V_D2 }, // rax1_v
+  { 0b0010111000100000011000, kVO_V_B8H4S2 }, // rsubhn_v
+  { 0b0110111000100000011000, kVO_V_B16H8S4 }, // rsubhn2_v
+  { 0b0000111000100000011111, kVO_V_BHS }, // saba_v
+  { 0b0000111000100000010100, kVO_V_B8H4S2 }, // sabal_v
+  { 0b0100111000100000010100, kVO_V_B16H8S4 }, // sabal2_v
+  { 0b0000111000100000011101, kVO_V_BHS }, // sabd_v
+  { 0b0000111000100000011100, kVO_V_B8H4S2 }, // sabdl_v
+  { 0b0100111000100000011100, kVO_V_B16H8S4 }, // sabdl2_v
+  { 0b0000111000100000000000, kVO_V_B8H4S2 }, // saddl_v
+  { 0b0100111000100000000000, kVO_V_B16H8S4 }, // saddl2_v
+  { 0b0000111000100000000001, kVO_V_BHS }, // shadd_v
+  { 0b0000111000100000001001, kVO_V_BHS }, // shsub_v
+  { 0b0000111000100000011001, kVO_V_BHS }, // smax_v
+  { 0b0000111000100000101001, kVO_V_BHS }, // smaxp_v
+  { 0b0000111000100000011011, kVO_V_BHS }, // smin_v
+  { 0b0000111000100000101011, kVO_V_BHS }, // sminp_v
+  { 0b0000111000100000000011, kVO_SV_Any }, // sqadd_v
+  { 0b0000111000100000001011, kVO_SV_Any }, // sqsub_v
+  { 0b0000111000100000000101, kVO_V_BHS }, // srhadd_v
+  { 0b0000111000100000001000, kVO_V_B8H4S2 }, // ssubl_v
+  { 0b0100111000100000001000, kVO_V_B16H8S4 }, // ssubl2_v
+  { 0b0010111000100000100001, kVO_V_Any }, // sub_v
+  { 0b0000111000100000011000, kVO_V_B8H4S2 }, // subhn_v
+  { 0b0000111000100000011000, kVO_V_B16H8S4 }, // subhn2_v
+  { 0b0000111000000000001010, kVO_V_BHS_D2 }, // trn1_v
+  { 0b0000111000000000011010, kVO_V_BHS_D2 }, // trn2_v
+  { 0b0010111000100000011111, kVO_V_BHS }, // uaba_v
+  { 0b0010111000100000010100, kVO_V_B8H4S2 }, // uabal_v
+  { 0b0110111000100000010100, kVO_V_B16H8S4 }, // uabal2_v
+  { 0b0010111000100000011101, kVO_V_BHS }, // uabd_v
+  { 0b0010111000100000011100, kVO_V_B8H4S2 }, // uabdl_v
+  { 0b0110111000100000011100, kVO_V_B16H8S4 }, // uabdl2_v
+  { 0b0010111000100000000000, kVO_V_B8H4S2 }, // uaddl_v
+  { 0b0110111000100000000000, kVO_V_B16H8S4 }, // uaddl2_v
+  { 0b0010111000100000000001, kVO_V_BHS }, // uhadd_v
+  { 0b0010111000100000001001, kVO_V_BHS }, // uhsub_v
+  { 0b0010111000100000011001, kVO_V_BHS }, // umax_v
+  { 0b0010111000100000101001, kVO_V_BHS }, // umaxp_v
+  { 0b0010111000100000011011, kVO_V_BHS }, // umin_v
+  { 0b0010111000100000101011, kVO_V_BHS }, // uminp_v
+  { 0b0010111000100000000011, kVO_SV_Any }, // uqadd_v
+  { 0b0010111000100000001011, kVO_SV_Any }, // uqsub_v
+  { 0b0010111000100000000101, kVO_V_BHS }, // urhadd_v
+  { 0b0010111000100000001000, kVO_V_B8H4S2 }, // usubl_v
+  { 0b0110111000100000001000, kVO_V_B16H8S4 }, // usubl2_v
+  { 0b0000111000000000000110, kVO_V_BHS_D2 }, // uzp1_v
+  { 0b0000111000000000010110, kVO_V_BHS_D2 }, // uzp2_v
+  { 0b0000111000000000001110, kVO_V_BHS_D2 }, // zip1_v
+  { 0b0000111000000000011110, kVO_V_BHS_D2 }  // zip2_v
+};
+
+const ISimdVVVI iSimdVVVI[2] = {
+  { 0b0010111000000000000000, kVO_V_B, 4, 11, 1 }, // ext_v
+  { 0b1100111001100000100011, kVO_V_D2, 6, 10, 0 }  // xar_v
+};
+
+const ISimdVVVV iSimdVVVV[2] = {
+  { 0b1100111000100000000000, kVO_V_B16 }, // bcax_v
+  { 0b1100111000000000000000, kVO_V_B16 }  // eor3_v
+};
+
+const ISimdVVVVx iSimdVVVVx[1] = {
+  { 0b1100111001000000000000, kOp_V4S, kOp_V4S, kOp_V4S, kOp_V4S }  // sm3ss1_v
+};
+
+const ISimdVVVe iSimdVVVe[25] = {
+  { 0b0000111000100000100101, kVO_V_BHS, 0b0010111100000000000000, kVO_V_HS }, // mla_v
+  { 0b0010111000100000100101, kVO_V_BHS, 0b0010111100000000010000, kVO_V_HS }, // mls_v
+  { 0b0000111000100000100111, kVO_V_BHS, 0b0000111100000000100000, kVO_V_HS }, // mul_v
+  { 0b0000111000100000100000, kVO_V_B8H4S2, 0b0000111100000000001000, kVO_V_H4S2 }, // smlal_v
+  { 0b0100111000100000100000, kVO_V_B16H8S4, 0b0100111100000000001000, kVO_V_H8S4 }, // smlal2_v
+  { 0b0000111000100000101000, kVO_V_B8H4S2, 0b0000111100000000011000, kVO_V_H4S2 }, // smlsl_v
+  { 0b0100111000100000101000, kVO_V_B16H8S4, 0b0100111100000000011000, kVO_V_H8S4 }, // smlsl2_v
+  { 0b0000111000100000110000, kVO_V_B8H4S2, 0b0000111100000000101000, kVO_V_H4S2 }, // smull_v
+  { 0b0100111000100000110000, kVO_V_B16H8S4, 0b0100111100000000101000, kVO_V_H8S4 }, // smull2_v
+  { 0b0000111000100000100100, kVO_SV_BHS, 0b0000111100000000001100, kVO_V_H4S2 }, // sqdmlal_v
+  { 0b0100111000100000100100, kVO_V_B16H8S4, 0b0100111100000000001100, kVO_V_H8S4 }, // sqdmlal2_v
+  { 0b0000111000100000101100, kVO_SV_BHS, 0b0000111100000000011100, kVO_V_H4S2 }, // sqdmlsl_v
+  { 0b0100111000100000101100, kVO_V_B16H8S4, 0b0100111100000000011100, kVO_V_H8S4 }, // sqdmlsl2_v
+  { 0b0000111000100000101101, kVO_SV_HS, 0b0000111100000000110000, kVO_SV_HS }, // sqdmulh_v
+  { 0b0000111000100000110100, kVO_SV_BHS, 0b0000111100000000101100, kVO_V_H4S2 }, // sqdmull_v
+  { 0b0100111000100000110100, kVO_V_B16H8S4, 0b0100111100000000101100, kVO_V_H8S4 }, // sqdmull2_v
+  { 0b0010111000000000100001, kVO_SV_HS, 0b0010111100000000110100, kVO_SV_HS }, // sqrdmlah_v
+  { 0b0010111000000000100011, kVO_SV_HS, 0b0010111100000000111100, kVO_SV_HS }, // sqrdmlsh_v
+  { 0b0010111000100000101101, kVO_SV_HS, 0b0000111100000000110100, kVO_SV_HS }, // sqrdmulh_v
+  { 0b0010111000100000100000, kVO_V_B8H4S2, 0b0010111100000000001000, kVO_V_H4S2 }, // umlal_v
+  { 0b0110111000100000100000, kVO_V_B16H8S4, 0b0010111100000000001000, kVO_V_H8S4 }, // umlal2_v
+  { 0b0010111000100000101000, kVO_V_B8H4S2, 0b0010111100000000011000, kVO_V_H4S2 }, // umlsl_v
+  { 0b0110111000100000101000, kVO_V_B16H8S4, 0b0110111100000000011000, kVO_V_H8S4 }, // umlsl2_v
+  { 0b0010111000100000110000, kVO_V_B8H4S2, 0b0010111100000000101000, kVO_V_H4S2 }, // umull_v
+  { 0b0110111000100000110000, kVO_V_B16H8S4, 0b0110111100000000101000, kVO_V_H8S4 }  // umull2_v
+};
+
+const ISimdVVVx iSimdVVVx[17] = {
+  { 0b0110111001000000111011, kOp_V4S, kOp_V8H, kOp_V8H }, // bfmmla_v
+  { 0b0101111000000000000000, kOp_Q, kOp_S, kOp_V4S }, // sha1c_v
+  { 0b0101111000000000001000, kOp_Q, kOp_S, kOp_V4S }, // sha1m_v
+  { 0b0101111000000000000100, kOp_Q, kOp_S, kOp_V4S }, // sha1p_v
+  { 0b0101111000000000001100, kOp_V4S, kOp_V4S, kOp_V4S }, // sha1su0_v
+  { 0b0101111000000000010000, kOp_Q, kOp_Q, kOp_V4S }, // sha256h_v
+  { 0b0101111000000000010100, kOp_Q, kOp_Q, kOp_V4S }, // sha256h2_v
+  { 0b0101111000000000011000, kOp_V4S, kOp_V4S, kOp_V4S }, // sha256su1_v
+  { 0b1100111001100000100000, kOp_Q, kOp_Q, kOp_V2D }, // sha512h_v
+  { 0b1100111001100000100001, kOp_Q, kOp_Q, kOp_V2D }, // sha512h2_v
+  { 0b1100111001100000100010, kOp_V2D, kOp_V2D, kOp_V2D }, // sha512su1_v
+  { 0b1100111001100000110000, kOp_V4S, kOp_V4S, kOp_V4S }, // sm3partw1_v
+  { 0b1100111001100000110001, kOp_V4S, kOp_V4S, kOp_V4S }, // sm3partw2_v
+  { 0b1100111001100000110010, kOp_V4S, kOp_V4S, kOp_V4S }, // sm4ekey_v
+  { 0b0100111010000000101001, kOp_V4S, kOp_V16B, kOp_V16B }, // smmla_v
+  { 0b0110111010000000101001, kOp_V4S, kOp_V16B, kOp_V16B }, // ummla_v
+  { 0b0100111010000000101011, kOp_V4S, kOp_V16B, kOp_V16B }  // usmmla_v
+};
+
+const ISimdVVx iSimdVVx[13] = {
+  { 0b0100111000101000010110, kOp_V16B, kOp_V16B }, // aesd_v
+  { 0b0100111000101000010010, kOp_V16B, kOp_V16B }, // aese_v
+  { 0b0100111000101000011110, kOp_V16B, kOp_V16B }, // aesimc_v
+  { 0b0100111000101000011010, kOp_V16B, kOp_V16B }, // aesmc_v
+  { 0b0001111001100011010000, kOp_H, kOp_S }, // bfcvt_v
+  { 0b0000111010100001011010, kOp_V4H, kOp_V4S }, // bfcvtn_v
+  { 0b0100111010100001011010, kOp_V8H, kOp_V4S }, // bfcvtn2_v
+  { 0b0001111001111110000000, kOp_GpW, kOp_D }, // fjcvtzs_v
+  { 0b0101111000101000000010, kOp_S, kOp_S }, // sha1h_v
+  { 0b0101111000101000000110, kOp_V4S, kOp_V4S }, // sha1su1_v
+  { 0b0101111000101000001010, kOp_V4S, kOp_V4S }, // sha256su0_v
+  { 0b1100111011000000100000, kOp_V2D, kOp_V2D }, // sha512su0_v
+  { 0b1100111011000000100001, kOp_V4S, kOp_V4S }  // sm4e_v
+};
+
+const ISimdWWV iSimdWWV[8] = {
+  { 0b0000111000100000000100, kVO_V_B8H4S2 }, // saddw_v
+  { 0b0000111000100000000100, kVO_V_B16H8S4 }, // saddw2_v
+  { 0b0000111000100000001100, kVO_V_B8H4S2 }, // ssubw_v
+  { 0b0000111000100000001100, kVO_V_B16H8S4 }, // ssubw2_v
+  { 0b0010111000100000000100, kVO_V_B8H4S2 }, // uaddw_v
+  { 0b0010111000100000000100, kVO_V_B16H8S4 }, // uaddw2_v
+  { 0b0010111000100000001100, kVO_V_B8H4S2 }, // usubw_v
+  { 0b0010111000100000001100, kVO_V_B16H8S4 }  // usubw2_v
+};
+
+const SimdBicOrr simdBicOrr[2] = {
+  { 0b0000111001100000000111, 0b0010111100000000000001 }, // bic_v
+  { 0b0000111010100000000111, 0b0000111100000000000001 }  // orr_v
+};
+
+const SimdCmp simdCmp[7] = {
+  { 0b0010111000100000100011, 0b0000111000100000100110, kVO_V_Any }, // cmeq_v
+  { 0b0000111000100000001111, 0b0010111000100000100010, kVO_V_Any }, // cmge_v
+  { 0b0000111000100000001101, 0b0000111000100000100010, kVO_V_Any }, // cmgt_v
+  { 0b0010111000100000001101, 0b0000000000000000000000, kVO_V_Any }, // cmhi_v
+  { 0b0010111000100000001111, 0b0000000000000000000000, kVO_V_Any }, // cmhs_v
+  { 0b0000000000000000000000, 0b0010111000100000100110, kVO_V_Any }, // cmle_v
+  { 0b0000000000000000000000, 0b0000111000100000101010, kVO_V_Any }  // cmlt_v
+};
+
+const SimdDot simdDot[5] = {
+  { 0b0010111001000000111111, 0b0000111101000000111100, kET_S, kET_H, kET_2H }, // bfdot_v
+  { 0b0000111010000000100101, 0b0000111110000000111000, kET_S, kET_B, kET_4B }, // sdot_v
+  { 0b0000000000000000000000, 0b0000111100000000111100, kET_S, kET_B, kET_4B }, // sudot_v
+  { 0b0010111010000000100101, 0b0010111110000000111000, kET_S, kET_B, kET_4B }, // udot_v
+  { 0b0000111010000000100111, 0b0000111110000000111100, kET_S, kET_B, kET_4B }  // usdot_v
+};
+
+const SimdFcadd simdFcadd[1] = {
+  { 0b0010111000000000111001 }  // fcadd_v
+};
+
+const SimdFccmpFccmpe simdFccmpFccmpe[2] = {
+  { 0b00011110001000000000010000000000 }, // fccmp_v
+  { 0b00011110001000000000010000010000 }  // fccmpe_v
+};
+
+const SimdFcm simdFcm[5] = {
+  { 0b0000111000100000111001, kHF_C, 0b0000111010100000110110 }, // fcmeq_v
+  { 0b0010111000100000111001, kHF_C, 0b0010111010100000110010 }, // fcmge_v
+  { 0b0010111010100000111001, kHF_C, 0b0000111010100000110010 }, // fcmgt_v
+  { 0b0000000000000000000000, kHF_C, 0b0010111010100000110110 }, // fcmle_v
+  { 0b0000000000000000000000, kHF_C, 0b0000111010100000111010 }  // fcmlt_v
+};
+
+const SimdFcmla simdFcmla[1] = {
+  { 0b0010111000000000110001, 0b0010111100000000000100 }  // fcmla_v
+};
+
+const SimdFcmpFcmpe simdFcmpFcmpe[2] = {
+  { 0b00011110001000000010000000000000 }, // fcmp_v
+  { 0b00011110001000000010000000010000 }  // fcmpe_v
+};
+
+const SimdFcvtLN simdFcvtLN[6] = {
+  { 0b0000111000100001011110, 0, 0 }, // fcvtl_v
+  { 0b0100111000100001011110, 0, 0 }, // fcvtl2_v
+  { 0b0000111000100001011010, 0, 0 }, // fcvtn_v
+  { 0b0100111000100001011010, 0, 0 }, // fcvtn2_v
+  { 0b0010111000100001011010, 1, 1 }, // fcvtxn_v
+  { 0b0110111000100001011010, 1, 0 }  // fcvtxn2_v
+};
+
+const SimdFcvtSV simdFcvtSV[12] = {
+  { 0b0000111000100001110010, 0b0000000000000000000000, 0b0001111000100100000000, 1 }, // fcvtas_v
+  { 0b0010111000100001110010, 0b0000000000000000000000, 0b0001111000100101000000, 1 }, // fcvtau_v
+  { 0b0000111000100001101110, 0b0000000000000000000000, 0b0001111000110000000000, 1 }, // fcvtms_v
+  { 0b0010111000100001101110, 0b0000000000000000000000, 0b0001111000110001000000, 1 }, // fcvtmu_v
+  { 0b0000111000100001101010, 0b0000000000000000000000, 0b0001111000100000000000, 1 }, // fcvtns_v
+  { 0b0010111000100001101010, 0b0000000000000000000000, 0b0001111000100001000000, 1 }, // fcvtnu_v
+  { 0b0000111010100001101010, 0b0000000000000000000000, 0b0001111000101000000000, 1 }, // fcvtps_v
+  { 0b0010111010100001101010, 0b0000000000000000000000, 0b0001111000101001000000, 1 }, // fcvtpu_v
+  { 0b0000111010100001101110, 0b0000111100000000111111, 0b0001111000111000000000, 1 }, // fcvtzs_v
+  { 0b0010111010100001101110, 0b0010111100000000111111, 0b0001111000111001000000, 1 }, // fcvtzu_v
+  { 0b0000111000100001110110, 0b0000111100000000111001, 0b0001111000100010000000, 0 }, // scvtf_v
+  { 0b0010111000100001110110, 0b0010111100000000111001, 0b0001111000100011000000, 0 }  // ucvtf_v
+};
+
+const SimdFmlal simdFmlal[6] = {
+  { 0b0010111011000000111111, 0b0000111111000000111100, 0, kET_S, kET_H, kET_H }, // bfmlalb_v
+  { 0b0110111011000000111111, 0b0100111111000000111100, 0, kET_S, kET_H, kET_H }, // bfmlalt_v
+  { 0b0000111000100000111011, 0b0000111110000000000000, 1, kET_S, kET_H, kET_H }, // fmlal_v
+  { 0b0010111000100000110011, 0b0010111110000000100000, 1, kET_S, kET_H, kET_H }, // fmlal2_v
+  { 0b0000111010100000111011, 0b0000111110000000010000, 1, kET_S, kET_H, kET_H }, // fmlsl_v
+  { 0b0010111010100000110011, 0b0010111110000000110000, 1, kET_S, kET_H, kET_H }  // fmlsl2_v
+};
+
+const SimdLdNStN simdLdNStN[12] = {
+  { 0b0000110101000000000000, 0b0000110001000000001000, 1, 0 }, // ld1_v
+  { 0b0000110101000000110000, 0b0000000000000000000000, 1, 1 }, // ld1r_v
+  { 0b0000110101100000000000, 0b0000110001000000100000, 2, 0 }, // ld2_v
+  { 0b0000110101100000110000, 0b0000000000000000000000, 2, 1 }, // ld2r_v
+  { 0b0000110101000000001000, 0b0000110001000000010000, 3, 0 }, // ld3_v
+  { 0b0000110101000000111000, 0b0000000000000000000000, 3, 1 }, // ld3r_v
+  { 0b0000110101100000001000, 0b0000110001000000000000, 4, 0 }, // ld4_v
+  { 0b0000110101100000111000, 0b0000000000000000000000, 4, 1 }, // ld4r_v
+  { 0b0000110100000000000000, 0b0000110000000000001000, 1, 0 }, // st1_v
+  { 0b0000110100100000000000, 0b0000110000000000100000, 2, 0 }, // st2_v
+  { 0b0000110100000000001000, 0b0000110000000000010000, 3, 0 }, // st3_v
+  { 0b0000110100100000001000, 0b0000110000000000000000, 4, 0 }  // st4_v
+};
+
+const SimdLdSt simdLdSt[2] = {
+  { 0b0011110101, 0b00111100010, 0b00111100011, 0b00011100, Inst::kIdLdur_v }, // ldr_v
+  { 0b0011110100, 0b00111100000, 0b00111100001, 0b00000000, Inst::kIdStur_v }  // str_v
+};
+
+const SimdLdpStp simdLdpStp[4] = {
+  { 0b0010110001, 0b0000000000 }, // ldnp_v
+  { 0b0010110101, 0b0010110011 }, // ldp_v
+  { 0b0010110000, 0b0000000000 }, // stnp_v
+  { 0b0010110100, 0b0010110010 }  // stp_v
+};
+
+const SimdLdurStur simdLdurStur[2] = {
+  { 0b0011110001000000000000 }, // ldur_v
+  { 0b0011110000000000000000 }  // stur_v
+};
+
+const SimdMoviMvni simdMoviMvni[2] = {
+  { 0b0000111100000000000001, 0 }, // movi_v
+  { 0b0000111100000000000001, 1 }  // mvni_v
+};
+
+const SimdShift simdShift[40] = {
+  { 0b0000000000000000000000, 0b0000111100000000100011, 1, kVO_V_B8H4S2 }, // rshrn_v
+  { 0b0000000000000000000000, 0b0100111100000000100011, 1, kVO_V_B16H8S4 }, // rshrn2_v
+  { 0b0000000000000000000000, 0b0000111100000000010101, 0, kVO_V_Any }, // shl_v
+  { 0b0000000000000000000000, 0b0000111100000000100001, 1, kVO_V_B8H4S2 }, // shrn_v
+  { 0b0000000000000000000000, 0b0100111100000000100001, 1, kVO_V_B16H8S4 }, // shrn2_v
+  { 0b0000000000000000000000, 0b0010111100000000010101, 0, kVO_V_Any }, // sli_v
+  { 0b0000111000100000010111, 0b0000000000000000000000, 1, kVO_SV_Any }, // sqrshl_v
+  { 0b0000000000000000000000, 0b0000111100000000100111, 1, kVO_SV_B8H4S2 }, // sqrshrn_v
+  { 0b0000000000000000000000, 0b0100111100000000100111, 1, kVO_V_B16H8S4 }, // sqrshrn2_v
+  { 0b0000000000000000000000, 0b0010111100000000100011, 1, kVO_SV_B8H4S2 }, // sqrshrun_v
+  { 0b0000000000000000000000, 0b0110111100000000100011, 1, kVO_V_B16H8S4 }, // sqrshrun2_v
+  { 0b0000111000100000010011, 0b0000111100000000011101, 0, kVO_SV_Any }, // sqshl_v
+  { 0b0000000000000000000000, 0b0010111100000000011001, 0, kVO_SV_Any }, // sqshlu_v
+  { 0b0000000000000000000000, 0b0000111100000000100101, 1, kVO_SV_B8H4S2 }, // sqshrn_v
+  { 0b0000000000000000000000, 0b0100111100000000100101, 1, kVO_V_B16H8S4 }, // sqshrn2_v
+  { 0b0000000000000000000000, 0b0010111100000000100001, 1, kVO_SV_B8H4S2 }, // sqshrun_v
+  { 0b0000000000000000000000, 0b0110111100000000100001, 1, kVO_V_B16H8S4 }, // sqshrun2_v
+  { 0b0000000000000000000000, 0b0010111100000000010001, 1, kVO_V_Any }, // sri_v
+  { 0b0000111000100000010101, 0b0000000000000000000000, 0, kVO_V_Any }, // srshl_v
+  { 0b0000000000000000000000, 0b0000111100000000001001, 1, kVO_V_Any }, // srshr_v
+  { 0b0000000000000000000000, 0b0000111100000000001101, 1, kVO_V_Any }, // srsra_v
+  { 0b0000111000100000010001, 0b0000000000000000000000, 0, kVO_V_Any }, // sshl_v
+  { 0b0000000000000000000000, 0b0000111100000000101001, 0, kVO_V_B8H4S2 }, // sshll_v
+  { 0b0000000000000000000000, 0b0100111100000000101001, 0, kVO_V_B16H8S4 }, // sshll2_v
+  { 0b0000000000000000000000, 0b0000111100000000000001, 1, kVO_V_Any }, // sshr_v
+  { 0b0000000000000000000000, 0b0000111100000000000101, 1, kVO_V_Any }, // ssra_v
+  { 0b0010111000100000010111, 0b0000000000000000000000, 0, kVO_SV_Any }, // uqrshl_v
+  { 0b0000000000000000000000, 0b0010111100000000100111, 1, kVO_SV_B8H4S2 }, // uqrshrn_v
+  { 0b0000000000000000000000, 0b0110111100000000100111, 1, kVO_V_B16H8S4 }, // uqrshrn2_v
+  { 0b0010111000100000010011, 0b0010111100000000011101, 0, kVO_SV_Any }, // uqshl_v
+  { 0b0000000000000000000000, 0b0010111100000000100101, 1, kVO_SV_B8H4S2 }, // uqshrn_v
+  { 0b0000000000000000000000, 0b0110111100000000100101, 1, kVO_V_B16H8S4 }, // uqshrn2_v
+  { 0b0010111000100000010101, 0b0000000000000000000000, 0, kVO_V_Any }, // urshl_v
+  { 0b0000000000000000000000, 0b0010111100000000001001, 1, kVO_V_Any }, // urshr_v
+  { 0b0000000000000000000000, 0b0010111100000000001101, 1, kVO_V_Any }, // ursra_v
+  { 0b0010111000100000010001, 0b0000000000000000000000, 0, kVO_V_Any }, // ushl_v
+  { 0b0000000000000000000000, 0b0010111100000000101001, 0, kVO_V_B8H4S2 }, // ushll_v
+  { 0b0000000000000000000000, 0b0110111100000000101001, 0, kVO_V_B16H8S4 }, // ushll2_v
+  { 0b0000000000000000000000, 0b0010111100000000000001, 1, kVO_V_Any }, // ushr_v
+  { 0b0000000000000000000000, 0b0010111100000000000101, 1, kVO_V_Any }  // usra_v
+};
+
+const SimdShiftES simdShiftES[2] = {
+  { 0b0010111000100001001110, kVO_V_B8H4S2 }, // shll_v
+  { 0b0110111000100001001110, kVO_V_B16H8S4 }  // shll2_v
+};
+
+const SimdSm3tt simdSm3tt[4] = {
+  { 0b1100111001000000100000 }, // sm3tt1a_v
+  { 0b1100111001000000100001 }, // sm3tt1b_v
+  { 0b1100111001000000100010 }, // sm3tt2a_v
+  { 0b1100111001000000100011 }  // sm3tt2b_v
+};
+
+const SimdSmovUmov simdSmovUmov[2] = {
+  { 0b0000111000000000001011, kVO_V_BHS, 1 }, // smov_v
+  { 0b0000111000000000001111, kVO_V_Any, 0 }  // umov_v
+};
+
+const SimdSxtlUxtl simdSxtlUxtl[4] = {
+  { 0b0000111100000000101001, kVO_V_B8H4S2 }, // sxtl_v
+  { 0b0100111100000000101001, kVO_V_B16H8S4 }, // sxtl2_v
+  { 0b0010111100000000101001, kVO_V_B8H4S2 }, // uxtl_v
+  { 0b0110111100000000101001, kVO_V_B16H8S4 }  // uxtl2_v
+};
+
+const SimdTblTbx simdTblTbx[2] = {
+  { 0b0000111000000000000000 }, // tbl_v
+  { 0b0000111000000000000100 }  // tbx_v
+};
+// ----------------------------------------------------------------------------
+// ${EncodingData:End}
+
+} // {EncodingData}
+} // {InstDB}
+
+/*
+// ${CommonData:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+const InstDB::CommonInfo InstDB::commonData[] = {
+  { 0}  // #0 [ref=440x]
+};
+// ----------------------------------------------------------------------------
+// ${CommonData:End}
+*/
+
+// ArmUtil - Id <-> Name
+// =====================
+
+#ifndef ASMJIT_DISABLE_TEXT
+// ${NameData:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+const char InstDB::_nameData[] =
+  "\0" "adc\0" "adcs\0" "addg\0" "adds\0" "addv\0" "adr\0" "adrp\0" "aesd\0" "aese\0" "aesimc\0" "aesmc\0" "and\0"
+  "ands\0" "asr\0" "asrv\0" "at\0" "autda\0" "autdb\0" "autdza\0" "autdzb\0" "autia\0" "autia1716\0" "autiasp\0"
+  "autiaz\0" "autib\0" "autib1716\0" "autibsp\0" "autibz\0" "autiza\0" "autizb\0" "axflag\0" "bcax\0" "bfc\0" "bfcvt\0"
+  "bfcvtn\0" "bfcvtn2\0" "bfdot\0" "bfi\0" "bfmlalb\0" "bfmlalt\0" "bfmmla\0" "bfxil\0" "bic\0" "bics\0" "bif\0"
+  "blr\0" "br\0" "brk\0" "bsl\0" "cas\0" "casa\0" "casab\0" "casah\0" "casal\0" "casalb\0" "casalh\0" "casb\0" "cash\0"
+  "casl\0" "caslb\0" "caslh\0" "casp\0" "caspa\0" "caspal\0" "caspl\0" "cbnz\0" "cbz\0" "ccmn\0" "cfinv\0" "cinc\0"
+  "cinv\0" "clrex\0" "cls\0" "clz\0" "cmhi\0" "cmhs\0" "cmpp\0" "cmtst\0" "cneg\0" "cnt\0" "crc32b\0" "crc32cb\0"
+  "crc32ch\0" "crc32cw\0" "crc32cx\0" "crc32h\0" "crc32w\0" "crc32x\0" "csdb\0" "cset\0" "csetm\0" "csinc\0" "csinv\0"
+  "csneg\0" "dcps1\0" "dcps2\0" "dcps3\0" "dgh\0" "dmb\0" "drps\0" "dsb\0" "dup\0" "eon\0" "eor3\0" "eret\0" "esb\0"
+  "ext\0" "extr\0" "fabd\0" "fabs\0" "facge\0" "facgt\0" "fadd\0" "faddp\0" "fcadd\0" "fccmp\0" "fccmpe\0" "fcmeq\0"
+  "fcmge\0" "fcmgt\0" "fcmla\0" "fcmle\0" "fcmlt\0" "fcmp\0" "fcmpe\0" "fcsel\0" "fcvtas\0" "fcvtau\0" "fcvtl\0"
+  "fcvtl2\0" "fcvtms\0" "fcvtmu\0" "fcvtns\0" "fcvtnu\0" "fcvtps\0" "fcvtpu\0" "fcvtxn\0" "fcvtxn2\0" "fcvtzs\0"
+  "fcvtzu\0" "fdiv\0" "fjcvtzs\0" "fmadd\0" "fmax\0" "fmaxnm\0" "fmaxnmp\0" "fmaxnmv\0" "fmaxp\0" "fmaxv\0" "fmin\0"
+  "fminnm\0" "fminnmp\0" "fminnmv\0" "fminp\0" "fminv\0" "fmla\0" "fmlal\0" "fmlal2\0" "fmls\0" "fmlsl\0" "fmlsl2\0"
+  "fmov\0" "fmsub\0" "fmul\0" "fmulx\0" "fneg\0" "fnmadd\0" "fnmsub\0" "fnmul\0" "frecpe\0" "frecps\0" "frecpx\0"
+  "frint32x\0" "frint32z\0" "frint64x\0" "frint64z\0" "frinta\0" "frinti\0" "frintm\0" "frintn\0" "frintp\0" "frintx\0"
+  "frintz\0" "frsqrte\0" "frsqrts\0" "fsqrt\0" "fsub\0" "gmi\0" "hint\0" "hlt\0" "hvc\0" "ins\0" "isb\0" "ld1\0"
+  "ld1r\0" "ld2\0" "ld2r\0" "ld3\0" "ld3r\0" "ld4\0" "ld4r\0" "ldadd\0" "ldadda\0" "ldaddab\0" "ldaddah\0" "ldaddal\0"
+  "ldaddalb\0" "ldaddalh\0" "ldaddb\0" "ldaddh\0" "ldaddl\0" "ldaddlb\0" "ldaddlh\0" "ldar\0" "ldarb\0" "ldarh\0"
+  "ldaxp\0" "ldaxr\0" "ldaxrb\0" "ldaxrh\0" "ldclr\0" "ldclra\0" "ldclrab\0" "ldclrah\0" "ldclral\0" "ldclralb\0"
+  "ldclralh\0" "ldclrb\0" "ldclrh\0" "ldclrl\0" "ldclrlb\0" "ldclrlh\0" "ldeor\0" "ldeora\0" "ldeorab\0" "ldeorah\0"
+  "ldeoral\0" "ldeoralb\0" "ldeoralh\0" "ldeorb\0" "ldeorh\0" "ldeorl\0" "ldeorlb\0" "ldeorlh\0" "ldg\0" "ldgm\0"
+  "ldlar\0" "ldlarb\0" "ldlarh\0" "ldnp\0" "ldp\0" "ldpsw\0" "ldr\0" "ldraa\0" "ldrab\0" "ldrb\0" "ldrh\0" "ldrsb\0"
+  "ldrsh\0" "ldrsw\0" "ldset\0" "ldseta\0" "ldsetab\0" "ldsetah\0" "ldsetal\0" "ldsetalb\0" "ldsetalh\0" "ldsetb\0"
+  "ldseth\0" "ldsetl\0" "ldsetlb\0" "ldsetlh\0" "ldsmax\0" "ldsmaxa\0" "ldsmaxab\0" "ldsmaxah\0" "ldsmaxal\0"
+  "ldsmaxalb\0" "ldsmaxalh\0" "ldsmaxb\0" "ldsmaxh\0" "ldsmaxl\0" "ldsmaxlb\0" "ldsmaxlh\0" "ldsmin\0" "ldsmina\0"
+  "ldsminab\0" "ldsminah\0" "ldsminal\0" "ldsminalb\0" "ldsminalh\0" "ldsminb\0" "ldsminh\0" "ldsminl\0" "ldsminlb\0"
+  "ldsminlh\0" "ldtr\0" "ldtrb\0" "ldtrh\0" "ldtrsb\0" "ldtrsh\0" "ldtrsw\0" "ldumax\0" "ldumaxa\0" "ldumaxab\0"
+  "ldumaxah\0" "ldumaxal\0" "ldumaxalb\0" "ldumaxalh\0" "ldumaxb\0" "ldumaxh\0" "ldumaxl\0" "ldumaxlb\0" "ldumaxlh\0"
+  "ldumin\0" "ldumina\0" "lduminab\0" "lduminah\0" "lduminal\0" "lduminalb\0" "lduminalh\0" "lduminb\0" "lduminh\0"
+  "lduminl\0" "lduminlb\0" "lduminlh\0" "ldur\0" "ldurb\0" "ldurh\0" "ldursb\0" "ldursh\0" "ldursw\0" "ldxp\0" "ldxr\0"
+  "ldxrb\0" "ldxrh\0" "lslv\0" "lsr\0" "lsrv\0" "mneg\0" "movi\0" "movk\0" "movn\0" "movz\0" "mrs\0" "msr\0" "mvn\0"
+  "mvni\0" "negs\0" "ngc\0" "ngcs\0" "nop\0" "not\0" "orn\0" "orr\0" "pacda\0" "pacdb\0" "pacdza\0" "pacdzb\0"
+  "pacga\0" "pmul\0" "pmull\0" "pmull2\0" "pssbb\0" "raddhn\0" "raddhn2\0" "rax1\0" "rbit\0" "rev\0" "rev16\0"
+  "rev32\0" "rev64\0" "ror\0" "rorv\0" "rsubhn\0" "rsubhn2\0" "saba\0" "sabal\0" "sabal2\0" "sabd\0" "sabdl\0"
+  "sabdl2\0" "sadalp\0" "saddl\0" "saddl2\0" "saddlp\0" "saddlv\0" "saddw\0" "saddw2\0" "sbc\0" "sbcs\0" "sbfiz\0"
+  "sbfm\0" "sbfx\0" "scvtf\0" "sdiv\0" "setf16\0" "setf8\0" "sev\0" "sevl\0" "sha1c\0" "sha1h\0" "sha1m\0" "sha1p\0"
+  "sha1su0\0" "sha1su1\0" "sha256h\0" "sha256h2\0" "sha256su0\0" "sha256su1\0" "sha512h\0" "sha512h2\0" "sha512su0\0"
+  "sha512su1\0" "shadd\0" "shsub\0" "sli\0" "sm3partw1\0" "sm3partw2\0" "sm3ss1\0" "sm3tt1a\0" "sm3tt1b\0" "sm3tt2a\0"
+  "sm3tt2b\0" "sm4e\0" "sm4ekey\0" "smaddl\0" "smaxp\0" "smaxv\0" "sminp\0" "sminv\0" "smlal\0" "smlal2\0" "smlsl\0"
+  "smlsl2\0" "smnegl\0" "smov\0" "smsubl\0" "smulh\0" "smull\0" "smull2\0" "sqabs\0" "sqdmlal\0" "sqdmlal2\0"
+  "sqdmlsl\0" "sqdmlsl2\0" "sqdmulh\0" "sqdmull\0" "sqdmull2\0" "sqneg\0" "sqrdmlah\0" "sqrdmlsh\0" "sqrdmulh\0"
+  "sqrshl\0" "sqrshrn\0" "sqrshrn2\0" "sqrshrun\0" "sqrshrun2\0" "sqshl\0" "sqshlu\0" "sqshrn\0" "sqshrn2\0"
+  "sqshrun\0" "sqshrun2\0" "sqsub\0" "sqxtn\0" "sqxtn2\0" "sqxtun\0" "sqxtun2\0" "srhadd\0" "sri\0" "srshl\0" "srshr\0"
+  "srsra\0" "sshl\0" "sshll\0" "sshll2\0" "sshr\0" "ssra\0" "ssubl\0" "ssubl2\0" "ssubw\0" "ssubw2\0" "st1\0" "st2\0"
+  "st2g\0" "st3\0" "st4\0" "stadd\0" "staddb\0" "staddh\0" "staddl\0" "staddlb\0" "staddlh\0" "stclr\0" "stclrb\0"
+  "stclrh\0" "stclrl\0" "stclrlb\0" "stclrlh\0" "steor\0" "steorb\0" "steorh\0" "steorl\0" "steorlb\0" "steorlh\0"
+  "stg\0" "stgm\0" "stgp\0" "stllr\0" "stllrb\0" "stllrh\0" "stlr\0" "stlrb\0" "stlrh\0" "stlxp\0" "stlxr\0" "stlxrb\0"
+  "stlxrh\0" "stnp\0" "stp\0" "str\0" "strb\0" "strh\0" "stset\0" "stsetb\0" "stseth\0" "stsetl\0" "stsetlb\0"
+  "stsetlh\0" "stsmax\0" "stsmaxb\0" "stsmaxh\0" "stsmaxl\0" "stsmaxlb\0" "stsmaxlh\0" "stsmin\0" "stsminb\0"
+  "stsminh\0" "stsminl\0" "stsminlb\0" "stsminlh\0" "sttr\0" "sttrb\0" "sttrh\0" "stumax\0" "stumaxb\0" "stumaxh\0"
+  "stumaxl\0" "stumaxlb\0" "stumaxlh\0" "stumin\0" "stuminb\0" "stuminh\0" "stuminl\0" "stuminlb\0" "stuminlh\0"
+  "stur\0" "sturb\0" "sturh\0" "stxp\0" "stxr\0" "stxrb\0" "stxrh\0" "stz2g\0" "stzg\0" "stzgm\0" "subg\0" "subp\0"
+  "subps\0" "subs\0" "sudot\0" "suqadd\0" "svc\0" "swp\0" "swpa\0" "swpab\0" "swpah\0" "swpal\0" "swpalb\0" "swpalh\0"
+  "swpb\0" "swph\0" "swpl\0" "swplb\0" "swplh\0" "sxtb\0" "sxth\0" "sxtl\0" "sxtl2\0" "sxtw\0" "sys\0" "tbl\0" "tbnz\0"
+  "tbx\0" "tbz\0" "tlbi\0" "trn1\0" "trn2\0" "uaba\0" "uabal\0" "uabal2\0" "uabd\0" "uabdl\0" "uabdl2\0" "uadalp\0"
+  "uaddl\0" "uaddl2\0" "uaddlp\0" "uaddlv\0" "uaddw\0" "uaddw2\0" "ubfiz\0" "ubfm\0" "ubfx\0" "ucvtf\0" "udf\0"
+  "udiv\0" "uhadd\0" "uhsub\0" "umaddl\0" "umaxp\0" "umaxv\0" "uminp\0" "uminv\0" "umlal\0" "umlal2\0" "umlsl\0"
+  "umlsl2\0" "ummla\0" "umnegl\0" "umov\0" "umsubl\0" "umulh\0" "umull\0" "umull2\0" "uqrshl\0" "uqrshrn\0"
+  "uqrshrn2\0" "uqshl\0" "uqshrn\0" "uqshrn2\0" "uqsub\0" "uqxtn\0" "uqxtn2\0" "urecpe\0" "urhadd\0" "urshl\0"
+  "urshr\0" "ursqrte\0" "ursra\0" "usdot\0" "ushl\0" "ushll\0" "ushll2\0" "ushr\0" "usmmla\0" "usqadd\0" "usra\0"
+  "usubl\0" "usubl2\0" "usubw\0" "usubw2\0" "uxtb\0" "uxth\0" "uxtl\0" "uxtl2\0" "uzp1\0" "uzp2\0" "wfe\0" "wfi\0"
+  "xaflag\0" "xar\0" "xpacd\0" "xpaci\0" "xpaclri\0" "yield\0" "zip1\0" "zip2";
+
+const InstDB::InstNameIndex InstDB::instNameIndex[26] = {
+  { Inst::kIdAdc          , Inst::kIdAnd_v         + 1 },
+  { Inst::kIdB            , Inst::kIdBsl_v         + 1 },
+  { Inst::kIdCas          , Inst::kIdCnt_v         + 1 },
+  { Inst::kIdDc           , Inst::kIdDup_v         + 1 },
+  { Inst::kIdEon          , Inst::kIdExt_v         + 1 },
+  { Inst::kIdFabd_v       , Inst::kIdFsub_v        + 1 },
+  { Inst::kIdGmi          , Inst::kIdGmi           + 1 },
+  { Inst::kIdHint         , Inst::kIdHvc           + 1 },
+  { Inst::kIdIc           , Inst::kIdIns_v         + 1 },
+  { Inst::kIdNone         , Inst::kIdNone          + 1 },
+  { Inst::kIdNone         , Inst::kIdNone          + 1 },
+  { Inst::kIdLdadd        , Inst::kIdLdur_v        + 1 },
+  { Inst::kIdMadd         , Inst::kIdMvni_v        + 1 },
+  { Inst::kIdNeg          , Inst::kIdNot_v         + 1 },
+  { Inst::kIdOrn          , Inst::kIdOrr_v         + 1 },
+  { Inst::kIdPacda        , Inst::kIdPmull2_v      + 1 },
+  { Inst::kIdNone         , Inst::kIdNone          + 1 },
+  { Inst::kIdRbit         , Inst::kIdRsubhn2_v     + 1 },
+  { Inst::kIdSbc          , Inst::kIdSxtl2_v       + 1 },
+  { Inst::kIdTlbi         , Inst::kIdTrn2_v        + 1 },
+  { Inst::kIdUbfiz        , Inst::kIdUzp2_v        + 1 },
+  { Inst::kIdNone         , Inst::kIdNone          + 1 },
+  { Inst::kIdWfe          , Inst::kIdWfi           + 1 },
+  { Inst::kIdXaflag       , Inst::kIdXtn2_v        + 1 },
+  { Inst::kIdYield        , Inst::kIdYield         + 1 },
+  { Inst::kIdZip1_v       , Inst::kIdZip2_v        + 1 }
+};
+// ----------------------------------------------------------------------------
+// ${NameData:End}
+#endif
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64
diff --git a/lib/lepton/asmjit/arm/a64instdb.h b/lib/lepton/asmjit/arm/a64instdb.h
new file mode 100644
index 0000000000..0575d1a2fa
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64instdb.h
@@ -0,0 +1,74 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64INSTDB_H_INCLUDED
+#define ASMJIT_ARM_A64INSTDB_H_INCLUDED
+
+#include "../arm/a64globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! Instruction database (AArch64).
+namespace InstDB {
+
+//! Instruction flags.
+enum InstFlags : uint32_t {
+  //! The instruction provides conditional execution.
+  kInstFlagCond = 0x00000001u,
+  //! SIMD instruction that processes elements in pairs.
+  kInstFlagPair = 0x00000002u,
+  //! SIMD instruction that does widening (Long).
+  kInstFlagLong = 0x00000004u,
+  //! SIMD instruction that does narrowing (Narrow).
+  kInstFlagNarrow = 0x00000008u,
+  //! SIMD element access of half-words can only be used with v0..15.
+  kInstFlagVH0_15 = 0x00000010u,
+
+  //! Instruction may consecutive registers if the number of operands is greater than 2.
+  kInstFlagConsecutive = 0x00000080u
+};
+
+//! Instruction information (AArch64).
+struct InstInfo {
+  //! Instruction encoding type.
+  uint32_t _encoding : 8;
+  //! Index to data specific to each encoding type.
+  uint32_t _encodingDataIndex : 8;
+  uint32_t _reserved : 2;
+  //! Index to \ref _nameData.
+  uint32_t _nameDataIndex : 14;
+
+  uint16_t _rwInfoIndex;
+  uint16_t _flags;
+
+  //! \name Accessors
+  //! \{
+
+  inline uint32_t rwInfoIndex() const noexcept { return _rwInfoIndex; }
+  inline uint32_t flags() const noexcept { return _flags; }
+
+  inline bool hasFlag(uint32_t flag) const { return (_flags & flag) != 0; }
+
+  //! \}
+};
+
+ASMJIT_VARAPI const InstInfo _instInfoTable[];
+
+static inline const InstInfo& infoById(InstId instId) noexcept {
+  instId &= uint32_t(InstIdParts::kRealId);
+  ASMJIT_ASSERT(Inst::isDefinedId(instId));
+  return _instInfoTable[instId];
+}
+
+} // {InstDB}
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64INSTDB_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64instdb_p.h b/lib/lepton/asmjit/arm/a64instdb_p.h
new file mode 100644
index 0000000000..eb4f3f8376
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64instdb_p.h
@@ -0,0 +1,876 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64INSTDB_H_P_INCLUDED
+#define ASMJIT_ARM_A64INSTDB_H_P_INCLUDED
+
+#include "../core/codeholder.h"
+#include "../arm/a64instdb.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+namespace InstDB {
+
+// a64::InstDB - Constants Used by Instructions
+// ============================================
+
+// GP register types supported by base instructions.
+static constexpr uint32_t kW = 0x1;
+static constexpr uint32_t kX = 0x2;
+static constexpr uint32_t kWX = 0x3;
+
+// GP high register IDs supported by the instruction.
+static constexpr uint32_t kZR = Gp::kIdZr;
+static constexpr uint32_t kSP = Gp::kIdSp;
+
+// a64::InstDB - RWInfo
+// ====================
+
+enum RWInfoType : uint32_t {
+  kRWI_R,
+  kRWI_RW,
+  kRWI_RX,
+  kRWI_RRW,
+  kRWI_RWX,
+  kRWI_W,
+  kRWI_WRW,
+  kRWI_WRX,
+  kRWI_WRRW,
+  kRWI_WRRX,
+  kRWI_WW,
+  kRWI_X,
+  kRWI_XRX,
+  kRWI_XXRRX,
+
+  kRWI_LDn,
+  kRWI_STn,
+
+  kRWI_SpecialStart = kRWI_LDn
+};
+
+// a64::InstDB - ElementType
+// =========================
+
+enum ElementType : uint8_t {
+  kET_None = Vec::kElementTypeNone,
+  kET_B    = Vec::kElementTypeB,
+  kET_H    = Vec::kElementTypeH,
+  kET_S    = Vec::kElementTypeS,
+  kET_D    = Vec::kElementTypeD,
+  kET_2H   = Vec::kElementTypeH2,
+  kET_4B   = Vec::kElementTypeB4
+};
+
+// a64::InstDB - GpType
+// ====================
+
+enum GpType : uint8_t {
+  kGp_W,
+  kGp_X,
+  kGp_X_SP
+};
+
+// a64::InstDB - OPSig
+// ===================
+
+enum kOpSignature : uint32_t {
+  kOp_GpW = GpW::kSignature,
+  kOp_GpX = GpX::kSignature,
+
+  kOp_B = VecB::kSignature,
+  kOp_H = VecH::kSignature,
+  kOp_S = VecS::kSignature,
+  kOp_D = VecD::kSignature,
+  kOp_Q = VecV::kSignature,
+
+  kOp_V8B = VecD::kSignature | Vec::kSignatureElementB,
+  kOp_V4H = VecD::kSignature | Vec::kSignatureElementH,
+  kOp_V2S = VecD::kSignature | Vec::kSignatureElementS,
+
+  kOp_V16B = VecV::kSignature | Vec::kSignatureElementB,
+  kOp_V8H = VecV::kSignature | Vec::kSignatureElementH,
+  kOp_V4S = VecV::kSignature | Vec::kSignatureElementS,
+  kOp_V2D = VecV::kSignature | Vec::kSignatureElementD
+};
+
+// a64::InstDB - HFConv
+// ====================
+
+enum kHFConv : uint32_t {
+  //! FP16 version of the instruction is not available.
+  kHF_N,
+
+  //! Doesn't do any change to the opcode.
+  kHF_0,
+
+  kHF_A,
+  kHF_B,
+  kHF_C,
+  kHF_D,
+
+  kHF_Count
+};
+
+// a64::InstDB - VOType
+// ====================
+
+//! Vector operand type combinations used by FP&SIMD instructions.
+enum VOType : uint32_t {
+  kVO_V_B,
+  kVO_V_BH,
+  kVO_V_BH_4S,
+  kVO_V_BHS,
+  kVO_V_BHS_D2,
+  kVO_V_HS,
+  kVO_V_S,
+
+  kVO_V_B8H4,
+  kVO_V_B8H4S2,
+  kVO_V_B8D1,
+  kVO_V_H4S2,
+
+  kVO_V_B16,
+  kVO_V_B16H8,
+  kVO_V_B16H8S4,
+  kVO_V_B16D2,
+  kVO_V_H8S4,
+  kVO_V_S4,
+  kVO_V_D2,
+
+  kVO_SV_BHS,
+  kVO_SV_B8H4S2,
+  kVO_SV_HS,
+  kVO_V_Any,
+  kVO_SV_Any,
+
+  kVO_Count
+};
+
+// a64::InstDB - EncodingId
+// ========================
+
+// ${EncodingId:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+enum EncodingId : uint32_t {
+  kEncodingNone = 0,
+  kEncodingBaseAddSub,
+  kEncodingBaseAdr,
+  kEncodingBaseAtDcIcTlbi,
+  kEncodingBaseAtomicCasp,
+  kEncodingBaseAtomicOp,
+  kEncodingBaseAtomicSt,
+  kEncodingBaseBfc,
+  kEncodingBaseBfi,
+  kEncodingBaseBfm,
+  kEncodingBaseBfx,
+  kEncodingBaseBranchCmp,
+  kEncodingBaseBranchReg,
+  kEncodingBaseBranchRel,
+  kEncodingBaseBranchTst,
+  kEncodingBaseCCmp,
+  kEncodingBaseCInc,
+  kEncodingBaseCSel,
+  kEncodingBaseCSet,
+  kEncodingBaseCmpCmn,
+  kEncodingBaseExtend,
+  kEncodingBaseExtract,
+  kEncodingBaseLdSt,
+  kEncodingBaseLdpStp,
+  kEncodingBaseLdxp,
+  kEncodingBaseLogical,
+  kEncodingBaseMov,
+  kEncodingBaseMovKNZ,
+  kEncodingBaseMrs,
+  kEncodingBaseMsr,
+  kEncodingBaseMvnNeg,
+  kEncodingBaseOp,
+  kEncodingBaseOpImm,
+  kEncodingBaseR,
+  kEncodingBaseRM_NoImm,
+  kEncodingBaseRM_SImm10,
+  kEncodingBaseRM_SImm9,
+  kEncodingBaseRR,
+  kEncodingBaseRRII,
+  kEncodingBaseRRR,
+  kEncodingBaseRRRR,
+  kEncodingBaseRev,
+  kEncodingBaseShift,
+  kEncodingBaseStx,
+  kEncodingBaseStxp,
+  kEncodingBaseSys,
+  kEncodingBaseTst,
+  kEncodingFSimdPair,
+  kEncodingFSimdSV,
+  kEncodingFSimdVV,
+  kEncodingFSimdVVV,
+  kEncodingFSimdVVVV,
+  kEncodingFSimdVVVe,
+  kEncodingISimdPair,
+  kEncodingISimdSV,
+  kEncodingISimdVV,
+  kEncodingISimdVVV,
+  kEncodingISimdVVVI,
+  kEncodingISimdVVVV,
+  kEncodingISimdVVVVx,
+  kEncodingISimdVVVe,
+  kEncodingISimdVVVx,
+  kEncodingISimdVVx,
+  kEncodingISimdWWV,
+  kEncodingSimdBicOrr,
+  kEncodingSimdCmp,
+  kEncodingSimdDot,
+  kEncodingSimdDup,
+  kEncodingSimdFcadd,
+  kEncodingSimdFccmpFccmpe,
+  kEncodingSimdFcm,
+  kEncodingSimdFcmla,
+  kEncodingSimdFcmpFcmpe,
+  kEncodingSimdFcsel,
+  kEncodingSimdFcvt,
+  kEncodingSimdFcvtLN,
+  kEncodingSimdFcvtSV,
+  kEncodingSimdFmlal,
+  kEncodingSimdFmov,
+  kEncodingSimdIns,
+  kEncodingSimdLdNStN,
+  kEncodingSimdLdSt,
+  kEncodingSimdLdpStp,
+  kEncodingSimdLdurStur,
+  kEncodingSimdMov,
+  kEncodingSimdMoviMvni,
+  kEncodingSimdShift,
+  kEncodingSimdShiftES,
+  kEncodingSimdSm3tt,
+  kEncodingSimdSmovUmov,
+  kEncodingSimdSxtlUxtl,
+  kEncodingSimdTblTbx
+};
+// ----------------------------------------------------------------------------
+// ${EncodingId:End}
+
+// a64::InstDB::EncodingData
+// =========================
+
+namespace EncodingData {
+
+#define M_OPCODE(field, bits) \
+  uint32_t _##field : bits; \
+  inline constexpr uint32_t field() const noexcept { return uint32_t(_##field) << (32 - bits); }
+
+struct BaseOp {
+  uint32_t opcode;
+};
+
+struct BaseOpImm {
+  uint32_t opcode;
+  uint16_t immBits;
+  uint16_t immOffset;
+};
+
+struct BaseR {
+  uint32_t opcode;
+  uint32_t rType : 8;
+  uint32_t rHiId : 8;
+  uint32_t rShift : 8;
+};
+
+struct BaseRR {
+  uint32_t opcode;
+  uint32_t aType : 2;
+  uint32_t aHiId : 6;
+  uint32_t aShift : 5;
+  uint32_t bType : 2;
+  uint32_t bHiId : 6;
+  uint32_t bShift : 5;
+  uint32_t uniform : 1;
+};
+
+struct BaseRRR {
+  M_OPCODE(opcode, 22)
+  uint32_t aType : 2;
+  uint32_t aHiId : 6;
+  uint32_t bType : 2;
+  uint32_t bHiId : 6;
+  uint32_t cType : 2;
+  uint32_t cHiId : 6;
+  uint32_t uniform : 1;
+};
+
+struct BaseRRRR {
+  M_OPCODE(opcode, 22)
+  uint32_t aType : 2;
+  uint32_t aHiId : 6;
+  uint32_t bType : 2;
+  uint32_t bHiId : 6;
+  uint32_t cType : 2;
+  uint32_t cHiId : 6;
+  uint32_t dType : 2;
+  uint32_t dHiId : 6;
+  uint32_t uniform : 1;
+};
+
+struct BaseRRII {
+  M_OPCODE(opcode, 22)
+  uint32_t aType : 2;
+  uint32_t aHiId : 6;
+  uint32_t bType : 2;
+  uint32_t bHiId : 6;
+  uint32_t aImmSize : 6;
+  uint32_t aImmDiscardLsb : 5;
+  uint32_t aImmOffset : 5;
+  uint32_t bImmSize : 6;
+  uint32_t bImmDiscardLsb : 5;
+  uint32_t bImmOffset : 5;
+};
+
+struct BaseAtDcIcTlbi {
+  uint32_t immVerifyMask : 14;
+  uint32_t immVerifyData : 14;
+  uint32_t mandatoryReg : 1;
+};
+
+struct BaseAdcSbc {
+  uint32_t opcode;
+};
+
+struct BaseAddSub {
+  uint32_t shiftedOp  : 10; // sf|.......|Sh|.|Rm|  Imm:6 |Rn|Rd|
+  uint32_t extendedOp : 10; // sf|.......|..|.|Rm|Opt|Imm3|Rn|Rd|
+  uint32_t immediateOp: 10; // sf|.......|Sh|    Imm:12   |Rn|Rd|
+};
+
+struct BaseAdr {
+  M_OPCODE(opcode, 22)
+  OffsetType offsetType : 8;
+};
+
+struct BaseBfm {
+  uint32_t opcode;         // sf|........|N|ImmR:6|ImmS:6|Rn|Rd|
+};
+
+struct BaseCmpCmn {
+  uint32_t shiftedOp  : 10; // sf|.......|Sh|.|Rm|  Imm:6 |Rn|11111|
+  uint32_t extendedOp : 10; // sf|.......|..|.|Rm|Opt|Imm3|Rn|11111|
+  uint32_t immediateOp: 10; // sf|.......|Sh|    Imm:12   |Rn|11111|
+};
+
+struct BaseExtend {
+  M_OPCODE(opcode, 22)      // sf|........|N|......|......|Rn|Rd|
+  uint32_t rType : 2;
+  uint32_t u : 1;
+};
+
+struct BaseLogical {
+  uint32_t shiftedOp  : 10; // sf|.......|Sh|.|Rm|  Imm:6 |Rn|Rd|
+  uint32_t immediateOp: 10; // sf|........|N|ImmR:6|ImmS:6|Rn|Rd|
+  uint32_t negateImm  : 1 ; // True if this is an operation that must negate IMM.
+};
+
+struct BaseMvnNeg {
+  uint32_t opcode;
+};
+
+struct BaseShift {
+  M_OPCODE(registerOp, 22)
+  M_OPCODE(immediateOp, 22)
+  uint32_t ror : 2;
+};
+
+struct BaseTst {
+  uint32_t shiftedOp  : 10; // sf|.......|Sh|.|Rm|  Imm:6 |Rn|11111|
+  uint32_t immediateOp: 10; // sf|........|N|ImmR:6|ImmS:6|Rn|11111|
+};
+
+struct BaseRM_NoImm {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t rHiId : 6;
+  uint32_t xOffset : 5;
+};
+
+struct BaseRM_SImm9 {
+  M_OPCODE(offsetOp, 22)
+  M_OPCODE(prePostOp, 22)
+  uint32_t rType : 2;
+  uint32_t rHiId : 6;
+  uint32_t xOffset : 5;
+  uint32_t immShift : 4;
+};
+
+struct BaseRM_SImm10 {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t rHiId : 6;
+  uint32_t xOffset : 5;
+  uint32_t immShift : 4;
+};
+
+struct BaseLdSt {
+  uint32_t uOffsetOp  : 10;
+  uint32_t prePostOp  : 11;
+  uint32_t registerOp : 11;
+  uint32_t literalOp  : 8;
+  uint32_t rType      : 2;
+  uint32_t xOffset    : 5;
+  uint32_t uOffsetShift : 3;
+  uint32_t uAltInstId : 14;
+};
+
+struct BaseLdpStp {
+  uint32_t offsetOp : 10;
+  uint32_t prePostOp : 10;
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+  uint32_t offsetShift : 3;
+};
+
+struct BaseStx {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+};
+
+struct BaseLdxp {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+};
+
+struct BaseStxp {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+};
+
+struct BaseAtomicOp {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+  uint32_t zr : 1;
+};
+
+struct BaseAtomicSt {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+};
+
+struct BaseAtomicCasp {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+};
+
+typedef BaseOp BaseBranchReg;
+typedef BaseOp BaseBranchRel;
+typedef BaseOp BaseBranchCmp;
+typedef BaseOp BaseBranchTst;
+typedef BaseOp BaseExtract;
+typedef BaseOp BaseBfc;
+typedef BaseOp BaseBfi;
+typedef BaseOp BaseBfx;
+typedef BaseOp BaseCCmp;
+typedef BaseOp BaseCInc;
+typedef BaseOp BaseCSet;
+typedef BaseOp BaseCSel;
+typedef BaseOp BaseMovKNZ;
+typedef BaseOp BaseMull;
+
+struct FSimdGeneric {
+  uint32_t _scalarOp : 28;
+  uint32_t _scalarHf : 4;
+  uint32_t _vectorOp : 28;
+  uint32_t _vectorHf : 4;
+
+  constexpr uint32_t scalarOp() const noexcept { return uint32_t(_scalarOp) << 10; }
+  constexpr uint32_t vectorOp() const noexcept { return uint32_t(_vectorOp) << 10; }
+  constexpr uint32_t scalarHf() const noexcept { return uint32_t(_scalarHf); }
+  constexpr uint32_t vectorHf() const noexcept { return uint32_t(_vectorHf); }
+};
+
+typedef FSimdGeneric FSimdVV;
+typedef FSimdGeneric FSimdVVV;
+typedef FSimdGeneric FSimdVVVV;
+
+struct FSimdSV {
+  uint32_t opcode;
+};
+
+struct FSimdVVVe {
+  uint32_t _scalarOp : 28;
+  uint32_t _scalarHf : 4;
+  uint32_t _vectorOp;
+  uint32_t _elementOp;
+
+  constexpr uint32_t scalarOp() const noexcept { return uint32_t(_scalarOp) << 10; }
+  constexpr uint32_t scalarHf() const noexcept { return uint32_t(_scalarHf); };
+  constexpr uint32_t vectorOp() const noexcept { return uint32_t(_vectorOp) << 10; }
+  constexpr uint32_t vectorHf() const noexcept { return kHF_C; }
+  constexpr uint32_t elementScalarOp() const noexcept { return (uint32_t(_elementOp) << 10) | (0x5u << 28); }
+  constexpr uint32_t elementVectorOp() const noexcept { return (uint32_t(_elementOp) << 10); }
+};
+
+struct SimdFcadd {
+  uint32_t _opcode;
+
+  constexpr uint32_t opcode() const noexcept { return _opcode << 10; }
+};
+
+struct SimdFcmla {
+  uint32_t _regularOp;
+  uint32_t _elementOp;
+
+  constexpr uint32_t regularOp() const noexcept { return uint32_t(_regularOp) << 10; }
+  constexpr uint32_t elementOp() const noexcept { return (uint32_t(_elementOp) << 10); }
+};
+
+struct SimdFccmpFccmpe {
+  uint32_t _opcode;
+  constexpr uint32_t opcode() const noexcept { return _opcode; }
+};
+
+struct SimdFcm {
+  uint32_t _registerOp : 28;
+  uint32_t _registerHf : 4;
+
+  uint32_t _zeroOp : 28;
+
+  constexpr bool hasRegisterOp() const noexcept { return _registerOp != 0; }
+  constexpr bool hasZeroOp() const noexcept { return _zeroOp != 0; }
+
+  constexpr uint32_t registerScalarOp() const noexcept { return (uint32_t(_registerOp) << 10) | (0x5u << 28); }
+  constexpr uint32_t registerVectorOp() const noexcept { return uint32_t(_registerOp) << 10; }
+  constexpr uint32_t registerScalarHf() const noexcept { return uint32_t(_registerHf); }
+  constexpr uint32_t registerVectorHf() const noexcept { return uint32_t(_registerHf); }
+
+  constexpr uint32_t zeroScalarOp() const noexcept { return (uint32_t(_zeroOp) << 10) | (0x5u << 28); }
+  constexpr uint32_t zeroVectorOp() const noexcept { return (uint32_t(_zeroOp) << 10); }
+};
+
+struct SimdFcmpFcmpe {
+  uint32_t _opcode;
+  constexpr uint32_t opcode() const noexcept { return _opcode; }
+};
+
+struct SimdFcvtLN {
+  uint32_t _opcode : 22;
+  uint32_t _isCvtxn : 1;
+  uint32_t _hasScalar : 1;
+
+  constexpr uint32_t scalarOp() const noexcept { return (uint32_t(_opcode) << 10) | (0x5u << 28); }
+  constexpr uint32_t vectorOp() const noexcept { return (uint32_t(_opcode) << 10); }
+
+  constexpr uint32_t isCvtxn() const noexcept { return _isCvtxn; }
+  constexpr uint32_t hasScalar() const noexcept { return _hasScalar; }
+};
+
+struct SimdFcvtSV {
+  uint32_t _vectorIntOp;
+  uint32_t _vectorFpOp;
+  uint32_t _generalOp : 31;
+  uint32_t _isFloatToInt : 1;
+
+  constexpr uint32_t scalarIntOp() const noexcept { return (uint32_t(_vectorIntOp) << 10) | (0x5u << 28); }
+  constexpr uint32_t vectorIntOp() const noexcept { return uint32_t(_vectorIntOp) << 10; }
+  constexpr uint32_t scalarFpOp() const noexcept { return (uint32_t(_vectorFpOp) << 10) | (0x5u << 28); }
+  constexpr uint32_t vectorFpOp() const noexcept { return uint32_t(_vectorFpOp) << 10; }
+  constexpr uint32_t generalOp() const noexcept { return (uint32_t(_generalOp) << 10); }
+
+  constexpr uint32_t isFloatToInt() const noexcept { return _isFloatToInt; }
+  constexpr uint32_t isFixedPoint() const noexcept { return _vectorFpOp != 0; }
+};
+
+struct SimdFmlal {
+  uint32_t _vectorOp;
+  uint32_t _elementOp;
+  uint8_t _optionalQ;
+  uint8_t tA;
+  uint8_t tB;
+  uint8_t tElement;
+
+  constexpr uint32_t vectorOp() const noexcept { return uint32_t(_vectorOp) << 10; }
+  constexpr uint32_t elementOp() const noexcept { return uint32_t(_elementOp) << 10; }
+  constexpr uint32_t optionalQ() const noexcept { return _optionalQ; }
+};
+
+struct FSimdPair {
+  uint32_t _scalarOp;
+  uint32_t _vectorOp;
+
+  constexpr uint32_t scalarOp() const noexcept { return uint32_t(_scalarOp) << 10; }
+  constexpr uint32_t vectorOp() const noexcept { return uint32_t(_vectorOp) << 10; }
+};
+
+struct ISimdVV {
+  M_OPCODE(opcode, 22)
+  uint32_t vecOpType : 6;
+};
+
+struct ISimdVVx {
+  M_OPCODE(opcode, 22)
+  uint32_t op0Signature;
+  uint32_t op1Signature;
+};
+
+struct ISimdSV {
+  M_OPCODE(opcode, 22)
+  uint32_t vecOpType : 6;
+};
+
+struct ISimdVVV {
+  M_OPCODE(opcode, 22)
+  uint32_t vecOpType : 6;
+};
+
+struct ISimdVVVx {
+  M_OPCODE(opcode, 22)
+  uint32_t op0Signature;
+  uint32_t op1Signature;
+  uint32_t op2Signature;
+};
+
+struct ISimdWWV {
+  M_OPCODE(opcode, 22)
+  uint32_t vecOpType : 6;
+};
+
+struct ISimdVVVe {
+  uint32_t regularOp : 26; // 22 bits used.
+  uint32_t regularVecType : 6;
+  uint32_t elementOp : 26; // 22 bits used.
+  uint32_t elementVecType : 6;
+};
+
+struct ISimdVVVI {
+  M_OPCODE(opcode, 22)
+  uint32_t vecOpType : 6;
+  uint32_t immSize : 4;
+  uint32_t immShift : 4;
+  uint32_t imm64HasOneBitLess : 1;
+};
+
+struct ISimdVVVV {
+  uint32_t opcode : 22;
+  uint32_t vecOpType : 6;
+};
+
+struct ISimdVVVVx {
+  uint32_t opcode;
+  uint32_t op0Signature;
+  uint32_t op1Signature;
+  uint32_t op2Signature;
+  uint32_t op3Signature;
+};
+
+struct SimdBicOrr {
+  uint32_t registerOp;   // 22 bits used.
+  uint32_t immediateOp;  // 22 bits used.
+};
+
+struct SimdCmp {
+  uint32_t regOp;
+  uint32_t zeroOp : 22;
+  uint32_t vecOpType : 6;
+};
+
+struct SimdDot {
+  uint32_t vectorOp;     // 22 bits used.
+  uint32_t elementOp;    // 22 bits used.
+  uint8_t tA;            // Element-type of the first operand.
+  uint8_t tB;            // Element-type of the second and third operands.
+  uint8_t tElement;      // Element-type of the element index[] operand.
+};
+
+struct SimdMoviMvni {
+  uint32_t opcode : 31;
+  uint32_t inverted : 1;
+};
+
+struct SimdLdSt {
+  uint32_t uOffsetOp  : 10;
+  uint32_t prePostOp  : 11;
+  uint32_t registerOp : 11;
+  uint32_t literalOp  : 8;
+  uint32_t uAltInstId : 16;
+};
+
+struct SimdLdNStN {
+  uint32_t singleOp;
+  uint32_t multipleOp : 22;
+  uint32_t n : 3;
+  uint32_t replicate : 1;
+};
+
+struct SimdLdpStp {
+  uint32_t offsetOp : 10;
+  uint32_t prePostOp : 10;
+};
+
+struct SimdLdurStur {
+  uint32_t opcode;
+};
+
+struct ISimdPair {
+  uint32_t opcode2;      // 22 bits used.
+  uint32_t opcode3 : 26; // 22 bits used.
+  uint32_t opType3 : 6;
+};
+
+struct SimdShift {
+  uint32_t registerOp;       // 22 bits used.
+  uint32_t immediateOp : 22; // 22 bits used.
+  uint32_t invertedImm : 1;
+  uint32_t vecOpType : 6;
+};
+
+struct SimdShiftES {
+  uint32_t opcode : 22;
+  uint32_t vecOpType : 6;
+};
+
+struct SimdSm3tt {
+  uint32_t opcode;
+};
+
+struct SimdSmovUmov {
+  uint32_t opcode : 22;
+  uint32_t vecOpType : 6;
+  uint32_t isSigned : 1;
+};
+
+struct SimdSxtlUxtl {
+  uint32_t opcode : 22;
+  uint32_t vecOpType : 6;
+};
+
+struct SimdTblTbx {
+  uint32_t opcode;
+};
+
+#undef M_OPCODE
+
+// ${EncodingDataForward:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+extern const BaseAddSub baseAddSub[4];
+extern const BaseAdr baseAdr[2];
+extern const BaseAtDcIcTlbi baseAtDcIcTlbi[4];
+extern const BaseAtomicCasp baseAtomicCasp[4];
+extern const BaseAtomicOp baseAtomicOp[123];
+extern const BaseAtomicSt baseAtomicSt[48];
+extern const BaseBfc baseBfc[1];
+extern const BaseBfi baseBfi[3];
+extern const BaseBfm baseBfm[3];
+extern const BaseBfx baseBfx[3];
+extern const BaseBranchCmp baseBranchCmp[2];
+extern const BaseBranchReg baseBranchReg[3];
+extern const BaseBranchRel baseBranchRel[2];
+extern const BaseBranchTst baseBranchTst[2];
+extern const BaseCCmp baseCCmp[2];
+extern const BaseCInc baseCInc[3];
+extern const BaseCSel baseCSel[4];
+extern const BaseCSet baseCSet[2];
+extern const BaseCmpCmn baseCmpCmn[2];
+extern const BaseExtend baseExtend[5];
+extern const BaseExtract baseExtract[1];
+extern const BaseLdSt baseLdSt[9];
+extern const BaseLdpStp baseLdpStp[6];
+extern const BaseLdxp baseLdxp[2];
+extern const BaseLogical baseLogical[8];
+extern const BaseMovKNZ baseMovKNZ[3];
+extern const BaseMvnNeg baseMvnNeg[3];
+extern const BaseOp baseOp[23];
+extern const BaseOpImm baseOpImm[14];
+extern const BaseR baseR[10];
+extern const BaseRM_NoImm baseRM_NoImm[21];
+extern const BaseRM_SImm10 baseRM_SImm10[2];
+extern const BaseRM_SImm9 baseRM_SImm9[23];
+extern const BaseRR baseRR[15];
+extern const BaseRRII baseRRII[2];
+extern const BaseRRR baseRRR[26];
+extern const BaseRRRR baseRRRR[6];
+extern const BaseShift baseShift[8];
+extern const BaseStx baseStx[3];
+extern const BaseStxp baseStxp[2];
+extern const BaseTst baseTst[1];
+extern const FSimdPair fSimdPair[5];
+extern const FSimdSV fSimdSV[4];
+extern const FSimdVV fSimdVV[17];
+extern const FSimdVVV fSimdVVV[13];
+extern const FSimdVVVV fSimdVVVV[4];
+extern const FSimdVVVe fSimdVVVe[4];
+extern const ISimdPair iSimdPair[1];
+extern const ISimdSV iSimdSV[7];
+extern const ISimdVV iSimdVV[29];
+extern const ISimdVVV iSimdVVV[65];
+extern const ISimdVVVI iSimdVVVI[2];
+extern const ISimdVVVV iSimdVVVV[2];
+extern const ISimdVVVVx iSimdVVVVx[1];
+extern const ISimdVVVe iSimdVVVe[25];
+extern const ISimdVVVx iSimdVVVx[17];
+extern const ISimdVVx iSimdVVx[13];
+extern const ISimdWWV iSimdWWV[8];
+extern const SimdBicOrr simdBicOrr[2];
+extern const SimdCmp simdCmp[7];
+extern const SimdDot simdDot[5];
+extern const SimdFcadd simdFcadd[1];
+extern const SimdFccmpFccmpe simdFccmpFccmpe[2];
+extern const SimdFcm simdFcm[5];
+extern const SimdFcmla simdFcmla[1];
+extern const SimdFcmpFcmpe simdFcmpFcmpe[2];
+extern const SimdFcvtLN simdFcvtLN[6];
+extern const SimdFcvtSV simdFcvtSV[12];
+extern const SimdFmlal simdFmlal[6];
+extern const SimdLdNStN simdLdNStN[12];
+extern const SimdLdSt simdLdSt[2];
+extern const SimdLdpStp simdLdpStp[4];
+extern const SimdLdurStur simdLdurStur[2];
+extern const SimdMoviMvni simdMoviMvni[2];
+extern const SimdShift simdShift[40];
+extern const SimdShiftES simdShiftES[2];
+extern const SimdSm3tt simdSm3tt[4];
+extern const SimdSmovUmov simdSmovUmov[2];
+extern const SimdSxtlUxtl simdSxtlUxtl[4];
+extern const SimdTblTbx simdTblTbx[2];
+// ----------------------------------------------------------------------------
+// ${EncodingDataForward:End}
+
+} // {EncodingData}
+
+// a64::InstDB - InstNameIndex
+// ===========================
+
+// ${NameLimits:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+enum : uint32_t { kMaxNameSize = 9 };
+// ----------------------------------------------------------------------------
+// ${NameLimits:End}
+
+struct InstNameIndex {
+  uint16_t start;
+  uint16_t end;
+};
+
+// a64::InstDB - Tables
+// ====================
+
+#ifndef ASMJIT_NO_TEXT
+extern const char _nameData[];
+extern const InstNameIndex instNameIndex[26];
+#endif // !ASMJIT_NO_TEXT
+
+} // {InstDB}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_A64_ARMINSTDB_H_P_INCLUDED
+
diff --git a/lib/lepton/asmjit/arm/a64operand.cpp b/lib/lepton/asmjit/arm/a64operand.cpp
new file mode 100644
index 0000000000..40a4a7952b
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64operand.cpp
@@ -0,0 +1,85 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64)
+
+#include "../core/misc_p.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::Operand - Tests
+// ====================
+
+#if defined(ASMJIT_TEST)
+UNIT(a64_operand) {
+  INFO("Checking if a64::reg(...) matches built-in IDs");
+  EXPECT(w(5) == w5);
+  EXPECT(x(5) == x5);
+
+  INFO("Checking Gp register properties");
+  EXPECT(Gp().isReg() == true);
+  EXPECT(w0.isReg() == true);
+  EXPECT(x0.isReg() == true);
+  EXPECT(w0.id() == 0);
+  EXPECT(x0.id() == 0);
+  EXPECT(wzr.id() == Gp::kIdZr);
+  EXPECT(xzr.id() == Gp::kIdZr);
+  EXPECT(wsp.id() == Gp::kIdSp);
+  EXPECT(sp.id() == Gp::kIdSp);
+  EXPECT(w0.size() == 4);
+  EXPECT(x0.size() == 8);
+  EXPECT(w0.type() == RegType::kARM_GpW);
+  EXPECT(x0.type() == RegType::kARM_GpX);
+  EXPECT(w0.group() == RegGroup::kGp);
+  EXPECT(x0.group() == RegGroup::kGp);
+
+  INFO("Checking Vec register properties");
+  EXPECT(v0.type() == RegType::kARM_VecV);
+  EXPECT(d0.type() == RegType::kARM_VecD);
+  EXPECT(s0.type() == RegType::kARM_VecS);
+  EXPECT(h0.type() == RegType::kARM_VecH);
+  EXPECT(b0.type() == RegType::kARM_VecB);
+
+  EXPECT(v0.group() == RegGroup::kVec);
+  EXPECT(d0.group() == RegGroup::kVec);
+  EXPECT(s0.group() == RegGroup::kVec);
+  EXPECT(h0.group() == RegGroup::kVec);
+  EXPECT(b0.group() == RegGroup::kVec);
+
+  INFO("Checking Vec register element[] access");
+  Vec vd_1 = v15.d(1);
+  EXPECT(vd_1.type() == RegType::kARM_VecV);
+  EXPECT(vd_1.group() == RegGroup::kVec);
+  EXPECT(vd_1.id() == 15);
+  EXPECT(vd_1.isVecD2());
+  EXPECT(vd_1.elementType() == Vec::kElementTypeD);
+  EXPECT(vd_1.hasElementIndex());
+  EXPECT(vd_1.elementIndex() == 1);
+
+  Vec vs_3 = v15.s(3);
+  EXPECT(vs_3.type() == RegType::kARM_VecV);
+  EXPECT(vs_3.group() == RegGroup::kVec);
+  EXPECT(vs_3.id() == 15);
+  EXPECT(vs_3.isVecS4());
+  EXPECT(vs_3.elementType() == Vec::kElementTypeS);
+  EXPECT(vs_3.hasElementIndex());
+  EXPECT(vs_3.elementIndex() == 3);
+
+  Vec vb_4 = v15.b4(3);
+  EXPECT(vb_4.type() == RegType::kARM_VecV);
+  EXPECT(vb_4.group() == RegGroup::kVec);
+  EXPECT(vb_4.id() == 15);
+  EXPECT(vb_4.isVecB4x4());
+  EXPECT(vb_4.elementType() == Vec::kElementTypeB4);
+  EXPECT(vb_4.hasElementIndex());
+  EXPECT(vb_4.elementIndex() == 3);
+}
+#endif
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64
diff --git a/lib/lepton/asmjit/arm/a64operand.h b/lib/lepton/asmjit/arm/a64operand.h
new file mode 100644
index 0000000000..c2d3c179a8
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64operand.h
@@ -0,0 +1,312 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64OPERAND_H_INCLUDED
+#define ASMJIT_ARM_A64OPERAND_H_INCLUDED
+
+#include "../arm/armoperand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+using arm::Reg;
+using arm::Mem;
+using arm::Gp;
+using arm::GpW;
+using arm::GpX;
+
+using arm::Vec;
+using arm::VecB;
+using arm::VecH;
+using arm::VecS;
+using arm::VecD;
+using arm::VecV;
+
+#ifndef _DOXYGEN
+namespace regs {
+#endif
+
+using namespace ::asmjit::arm::regs;
+
+static constexpr GpW w0 = GpW(0);
+static constexpr GpW w1 = GpW(1);
+static constexpr GpW w2 = GpW(2);
+static constexpr GpW w3 = GpW(3);
+static constexpr GpW w4 = GpW(4);
+static constexpr GpW w5 = GpW(5);
+static constexpr GpW w6 = GpW(6);
+static constexpr GpW w7 = GpW(7);
+static constexpr GpW w8 = GpW(8);
+static constexpr GpW w9 = GpW(9);
+static constexpr GpW w10 = GpW(10);
+static constexpr GpW w11 = GpW(11);
+static constexpr GpW w12 = GpW(12);
+static constexpr GpW w13 = GpW(13);
+static constexpr GpW w14 = GpW(14);
+static constexpr GpW w15 = GpW(15);
+static constexpr GpW w16 = GpW(16);
+static constexpr GpW w17 = GpW(17);
+static constexpr GpW w18 = GpW(18);
+static constexpr GpW w19 = GpW(19);
+static constexpr GpW w20 = GpW(20);
+static constexpr GpW w21 = GpW(21);
+static constexpr GpW w22 = GpW(22);
+static constexpr GpW w23 = GpW(23);
+static constexpr GpW w24 = GpW(24);
+static constexpr GpW w25 = GpW(25);
+static constexpr GpW w26 = GpW(26);
+static constexpr GpW w27 = GpW(27);
+static constexpr GpW w28 = GpW(28);
+static constexpr GpW w29 = GpW(29);
+static constexpr GpW w30 = GpW(30);
+static constexpr GpW wzr = GpW(Gp::kIdZr);
+static constexpr GpW wsp = GpW(Gp::kIdSp);
+
+static constexpr GpX x0 = GpX(0);
+static constexpr GpX x1 = GpX(1);
+static constexpr GpX x2 = GpX(2);
+static constexpr GpX x3 = GpX(3);
+static constexpr GpX x4 = GpX(4);
+static constexpr GpX x5 = GpX(5);
+static constexpr GpX x6 = GpX(6);
+static constexpr GpX x7 = GpX(7);
+static constexpr GpX x8 = GpX(8);
+static constexpr GpX x9 = GpX(9);
+static constexpr GpX x10 = GpX(10);
+static constexpr GpX x11 = GpX(11);
+static constexpr GpX x12 = GpX(12);
+static constexpr GpX x13 = GpX(13);
+static constexpr GpX x14 = GpX(14);
+static constexpr GpX x15 = GpX(15);
+static constexpr GpX x16 = GpX(16);
+static constexpr GpX x17 = GpX(17);
+static constexpr GpX x18 = GpX(18);
+static constexpr GpX x19 = GpX(19);
+static constexpr GpX x20 = GpX(20);
+static constexpr GpX x21 = GpX(21);
+static constexpr GpX x22 = GpX(22);
+static constexpr GpX x23 = GpX(23);
+static constexpr GpX x24 = GpX(24);
+static constexpr GpX x25 = GpX(25);
+static constexpr GpX x26 = GpX(26);
+static constexpr GpX x27 = GpX(27);
+static constexpr GpX x28 = GpX(28);
+static constexpr GpX x29 = GpX(29);
+static constexpr GpX x30 = GpX(30);
+static constexpr GpX xzr = GpX(Gp::kIdZr);
+static constexpr GpX sp = GpX(Gp::kIdSp);
+
+static constexpr VecB b0 = VecB(0);
+static constexpr VecB b1 = VecB(1);
+static constexpr VecB b2 = VecB(2);
+static constexpr VecB b3 = VecB(3);
+static constexpr VecB b4 = VecB(4);
+static constexpr VecB b5 = VecB(5);
+static constexpr VecB b6 = VecB(6);
+static constexpr VecB b7 = VecB(7);
+static constexpr VecB b8 = VecB(8);
+static constexpr VecB b9 = VecB(9);
+static constexpr VecB b10 = VecB(10);
+static constexpr VecB b11 = VecB(11);
+static constexpr VecB b12 = VecB(12);
+static constexpr VecB b13 = VecB(13);
+static constexpr VecB b14 = VecB(14);
+static constexpr VecB b15 = VecB(15);
+static constexpr VecB b16 = VecB(16);
+static constexpr VecB b17 = VecB(17);
+static constexpr VecB b18 = VecB(18);
+static constexpr VecB b19 = VecB(19);
+static constexpr VecB b20 = VecB(20);
+static constexpr VecB b21 = VecB(21);
+static constexpr VecB b22 = VecB(22);
+static constexpr VecB b23 = VecB(23);
+static constexpr VecB b24 = VecB(24);
+static constexpr VecB b25 = VecB(25);
+static constexpr VecB b26 = VecB(26);
+static constexpr VecB b27 = VecB(27);
+static constexpr VecB b28 = VecB(28);
+static constexpr VecB b29 = VecB(29);
+static constexpr VecB b30 = VecB(30);
+static constexpr VecB b31 = VecB(31);
+
+static constexpr VecH h0 = VecH(0);
+static constexpr VecH h1 = VecH(1);
+static constexpr VecH h2 = VecH(2);
+static constexpr VecH h3 = VecH(3);
+static constexpr VecH h4 = VecH(4);
+static constexpr VecH h5 = VecH(5);
+static constexpr VecH h6 = VecH(6);
+static constexpr VecH h7 = VecH(7);
+static constexpr VecH h8 = VecH(8);
+static constexpr VecH h9 = VecH(9);
+static constexpr VecH h10 = VecH(10);
+static constexpr VecH h11 = VecH(11);
+static constexpr VecH h12 = VecH(12);
+static constexpr VecH h13 = VecH(13);
+static constexpr VecH h14 = VecH(14);
+static constexpr VecH h15 = VecH(15);
+static constexpr VecH h16 = VecH(16);
+static constexpr VecH h17 = VecH(17);
+static constexpr VecH h18 = VecH(18);
+static constexpr VecH h19 = VecH(19);
+static constexpr VecH h20 = VecH(20);
+static constexpr VecH h21 = VecH(21);
+static constexpr VecH h22 = VecH(22);
+static constexpr VecH h23 = VecH(23);
+static constexpr VecH h24 = VecH(24);
+static constexpr VecH h25 = VecH(25);
+static constexpr VecH h26 = VecH(26);
+static constexpr VecH h27 = VecH(27);
+static constexpr VecH h28 = VecH(28);
+static constexpr VecH h29 = VecH(29);
+static constexpr VecH h30 = VecH(30);
+static constexpr VecH h31 = VecH(31);
+
+static constexpr VecS s0 = VecS(0);
+static constexpr VecS s1 = VecS(1);
+static constexpr VecS s2 = VecS(2);
+static constexpr VecS s3 = VecS(3);
+static constexpr VecS s4 = VecS(4);
+static constexpr VecS s5 = VecS(5);
+static constexpr VecS s6 = VecS(6);
+static constexpr VecS s7 = VecS(7);
+static constexpr VecS s8 = VecS(8);
+static constexpr VecS s9 = VecS(9);
+static constexpr VecS s10 = VecS(10);
+static constexpr VecS s11 = VecS(11);
+static constexpr VecS s12 = VecS(12);
+static constexpr VecS s13 = VecS(13);
+static constexpr VecS s14 = VecS(14);
+static constexpr VecS s15 = VecS(15);
+static constexpr VecS s16 = VecS(16);
+static constexpr VecS s17 = VecS(17);
+static constexpr VecS s18 = VecS(18);
+static constexpr VecS s19 = VecS(19);
+static constexpr VecS s20 = VecS(20);
+static constexpr VecS s21 = VecS(21);
+static constexpr VecS s22 = VecS(22);
+static constexpr VecS s23 = VecS(23);
+static constexpr VecS s24 = VecS(24);
+static constexpr VecS s25 = VecS(25);
+static constexpr VecS s26 = VecS(26);
+static constexpr VecS s27 = VecS(27);
+static constexpr VecS s28 = VecS(28);
+static constexpr VecS s29 = VecS(29);
+static constexpr VecS s30 = VecS(30);
+static constexpr VecS s31 = VecS(31);
+
+static constexpr VecD d0 = VecD(0);
+static constexpr VecD d1 = VecD(1);
+static constexpr VecD d2 = VecD(2);
+static constexpr VecD d3 = VecD(3);
+static constexpr VecD d4 = VecD(4);
+static constexpr VecD d5 = VecD(5);
+static constexpr VecD d6 = VecD(6);
+static constexpr VecD d7 = VecD(7);
+static constexpr VecD d8 = VecD(8);
+static constexpr VecD d9 = VecD(9);
+static constexpr VecD d10 = VecD(10);
+static constexpr VecD d11 = VecD(11);
+static constexpr VecD d12 = VecD(12);
+static constexpr VecD d13 = VecD(13);
+static constexpr VecD d14 = VecD(14);
+static constexpr VecD d15 = VecD(15);
+static constexpr VecD d16 = VecD(16);
+static constexpr VecD d17 = VecD(17);
+static constexpr VecD d18 = VecD(18);
+static constexpr VecD d19 = VecD(19);
+static constexpr VecD d20 = VecD(20);
+static constexpr VecD d21 = VecD(21);
+static constexpr VecD d22 = VecD(22);
+static constexpr VecD d23 = VecD(23);
+static constexpr VecD d24 = VecD(24);
+static constexpr VecD d25 = VecD(25);
+static constexpr VecD d26 = VecD(26);
+static constexpr VecD d27 = VecD(27);
+static constexpr VecD d28 = VecD(28);
+static constexpr VecD d29 = VecD(29);
+static constexpr VecD d30 = VecD(30);
+static constexpr VecD d31 = VecD(31);
+
+static constexpr VecV q0 = VecV(0);
+static constexpr VecV q1 = VecV(1);
+static constexpr VecV q2 = VecV(2);
+static constexpr VecV q3 = VecV(3);
+static constexpr VecV q4 = VecV(4);
+static constexpr VecV q5 = VecV(5);
+static constexpr VecV q6 = VecV(6);
+static constexpr VecV q7 = VecV(7);
+static constexpr VecV q8 = VecV(8);
+static constexpr VecV q9 = VecV(9);
+static constexpr VecV q10 = VecV(10);
+static constexpr VecV q11 = VecV(11);
+static constexpr VecV q12 = VecV(12);
+static constexpr VecV q13 = VecV(13);
+static constexpr VecV q14 = VecV(14);
+static constexpr VecV q15 = VecV(15);
+static constexpr VecV q16 = VecV(16);
+static constexpr VecV q17 = VecV(17);
+static constexpr VecV q18 = VecV(18);
+static constexpr VecV q19 = VecV(19);
+static constexpr VecV q20 = VecV(20);
+static constexpr VecV q21 = VecV(21);
+static constexpr VecV q22 = VecV(22);
+static constexpr VecV q23 = VecV(23);
+static constexpr VecV q24 = VecV(24);
+static constexpr VecV q25 = VecV(25);
+static constexpr VecV q26 = VecV(26);
+static constexpr VecV q27 = VecV(27);
+static constexpr VecV q28 = VecV(28);
+static constexpr VecV q29 = VecV(29);
+static constexpr VecV q30 = VecV(30);
+static constexpr VecV q31 = VecV(31);
+
+static constexpr VecV v0 = VecV(0);
+static constexpr VecV v1 = VecV(1);
+static constexpr VecV v2 = VecV(2);
+static constexpr VecV v3 = VecV(3);
+static constexpr VecV v4 = VecV(4);
+static constexpr VecV v5 = VecV(5);
+static constexpr VecV v6 = VecV(6);
+static constexpr VecV v7 = VecV(7);
+static constexpr VecV v8 = VecV(8);
+static constexpr VecV v9 = VecV(9);
+static constexpr VecV v10 = VecV(10);
+static constexpr VecV v11 = VecV(11);
+static constexpr VecV v12 = VecV(12);
+static constexpr VecV v13 = VecV(13);
+static constexpr VecV v14 = VecV(14);
+static constexpr VecV v15 = VecV(15);
+static constexpr VecV v16 = VecV(16);
+static constexpr VecV v17 = VecV(17);
+static constexpr VecV v18 = VecV(18);
+static constexpr VecV v19 = VecV(19);
+static constexpr VecV v20 = VecV(20);
+static constexpr VecV v21 = VecV(21);
+static constexpr VecV v22 = VecV(22);
+static constexpr VecV v23 = VecV(23);
+static constexpr VecV v24 = VecV(24);
+static constexpr VecV v25 = VecV(25);
+static constexpr VecV v26 = VecV(26);
+static constexpr VecV v27 = VecV(27);
+static constexpr VecV v28 = VecV(28);
+static constexpr VecV v29 = VecV(29);
+static constexpr VecV v30 = VecV(30);
+static constexpr VecV v31 = VecV(31);
+
+#ifndef _DOXYGEN
+} // {regs}
+
+// Make `a64::regs` accessible through `a64` namespace as well.
+using namespace regs;
+#endif
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64OPERAND_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64rapass.cpp b/lib/lepton/asmjit/arm/a64rapass.cpp
new file mode 100644
index 0000000000..aaec1c90f9
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64rapass.cpp
@@ -0,0 +1,852 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64) && !defined(ASMJIT_NO_COMPILER)
+
+#include "../core/cpuinfo.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../arm/a64assembler.h"
+#include "../arm/a64compiler.h"
+#include "../arm/a64emithelper_p.h"
+#include "../arm/a64instapi_p.h"
+#include "../arm/a64instdb_p.h"
+#include "../arm/a64rapass_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::ARMRAPass - Helpers
+// ========================
+
+// TODO: [ARM] These should be shared with all backends.
+ASMJIT_MAYBE_UNUSED
+static inline uint64_t raImmMaskFromSize(uint32_t size) noexcept {
+  ASMJIT_ASSERT(size > 0 && size < 256);
+  static const uint64_t masks[] = {
+    0x00000000000000FFu, //   1
+    0x000000000000FFFFu, //   2
+    0x00000000FFFFFFFFu, //   4
+    0xFFFFFFFFFFFFFFFFu, //   8
+    0x0000000000000000u, //  16
+    0x0000000000000000u, //  32
+    0x0000000000000000u, //  64
+    0x0000000000000000u, // 128
+    0x0000000000000000u  // 256
+  };
+  return masks[Support::ctz(size)];
+}
+
+static const RegMask raConsecutiveLeadCountToRegMaskFilter[5] = {
+  0xFFFFFFFFu, // [0] No consecutive.
+  0x00000000u, // [1] Invalid, never used.
+  0x7FFFFFFFu, // [2] 2 consecutive registers.
+  0x3FFFFFFFu, // [3] 3 consecutive registers.
+  0x1FFFFFFFu  // [4] 4 consecutive registers.
+};
+
+static inline RATiedFlags raUseOutFlagsFromRWFlags(OpRWFlags rwFlags) noexcept {
+  static constexpr RATiedFlags map[] = {
+    RATiedFlags::kNone,
+    RATiedFlags::kRead  | RATiedFlags::kUse, // kRead
+    RATiedFlags::kWrite | RATiedFlags::kOut, // kWrite
+    RATiedFlags::kRW    | RATiedFlags::kUse, // kRW
+  };
+
+  return map[uint32_t(rwFlags & OpRWFlags::kRW)];
+}
+
+static inline RATiedFlags raRegRwFlags(OpRWFlags flags) noexcept {
+  return raUseOutFlagsFromRWFlags(flags);
+}
+
+static inline RATiedFlags raMemBaseRwFlags(OpRWFlags flags) noexcept {
+  constexpr uint32_t shift = Support::ConstCTZ<uint32_t(OpRWFlags::kMemBaseRW)>::value;
+  return raUseOutFlagsFromRWFlags(OpRWFlags(uint32_t(flags) >> shift) & OpRWFlags::kRW);
+}
+
+static inline RATiedFlags raMemIndexRwFlags(OpRWFlags flags) noexcept {
+  constexpr uint32_t shift = Support::ConstCTZ<uint32_t(OpRWFlags::kMemIndexRW)>::value;
+  return raUseOutFlagsFromRWFlags(OpRWFlags(uint32_t(flags) >> shift) & OpRWFlags::kRW);
+}
+// a64::RACFGBuilder
+// =================
+
+class RACFGBuilder : public RACFGBuilderT<RACFGBuilder> {
+public:
+  Arch _arch;
+
+  inline RACFGBuilder(ARMRAPass* pass) noexcept
+    : RACFGBuilderT<RACFGBuilder>(pass),
+      _arch(pass->cc()->arch()) {}
+
+  inline Compiler* cc() const noexcept { return static_cast<Compiler*>(_cc); }
+
+  Error onInst(InstNode* inst, InstControlFlow& controlType, RAInstBuilder& ib) noexcept;
+
+  Error onBeforeInvoke(InvokeNode* invokeNode) noexcept;
+  Error onInvoke(InvokeNode* invokeNode, RAInstBuilder& ib) noexcept;
+
+  Error moveImmToRegArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept;
+  Error moveImmToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_) noexcept;
+  Error moveRegToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const BaseReg& reg) noexcept;
+
+  Error onBeforeRet(FuncRetNode* funcRet) noexcept;
+  Error onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept;
+};
+
+// a64::RACFGBuilder - OnInst
+// ==========================
+
+// TODO: [ARM] This is just a workaround...
+static InstControlFlow getControlFlowType(InstId instId) noexcept {
+  switch (BaseInst::extractRealId(instId)) {
+    case Inst::kIdB:
+    case Inst::kIdBr:
+      if (BaseInst::extractARMCondCode(instId) == CondCode::kAL)
+        return InstControlFlow::kJump;
+      else
+        return InstControlFlow::kBranch;
+    case Inst::kIdBl:
+    case Inst::kIdBlr:
+      return InstControlFlow::kCall;
+    case Inst::kIdCbz:
+    case Inst::kIdCbnz:
+    case Inst::kIdTbz:
+    case Inst::kIdTbnz:
+      return InstControlFlow::kBranch;
+    case Inst::kIdRet:
+      return InstControlFlow::kReturn;
+    default:
+      return InstControlFlow::kRegular;
+  }
+}
+
+Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& controlType, RAInstBuilder& ib) noexcept {
+  InstRWInfo rwInfo;
+
+  if (Inst::isDefinedId(inst->realId())) {
+    InstId instId = inst->id();
+    uint32_t opCount = inst->opCount();
+    const Operand* opArray = inst->operands();
+    ASMJIT_PROPAGATE(InstInternal::queryRWInfo(_arch, inst->baseInst(), opArray, opCount, &rwInfo));
+
+    const InstDB::InstInfo& instInfo = InstDB::infoById(instId);
+    uint32_t singleRegOps = 0;
+
+    ib.addInstRWFlags(rwInfo.instFlags());
+
+    if (opCount) {
+      uint32_t consecutiveOffset = 0xFFFFFFFFu;
+      uint32_t consecutiveParent = Globals::kInvalidId;
+
+      for (uint32_t i = 0; i < opCount; i++) {
+        const Operand& op = opArray[i];
+        const OpRWInfo& opRwInfo = rwInfo.operand(i);
+
+        if (op.isReg()) {
+          // Register Operand
+          // ----------------
+          const Reg& reg = op.as<Reg>();
+
+          RATiedFlags flags = raRegRwFlags(opRwInfo.opFlags());
+          uint32_t vIndex = Operand::virtIdToIndex(reg.id());
+
+          if (vIndex < Operand::kVirtIdCount) {
+            RAWorkReg* workReg;
+            ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+            // Use RW instead of Write in case that not the whole register is overwritten. This is important for
+            // liveness as we cannot kill a register that will be used.
+            if ((flags & RATiedFlags::kRW) == RATiedFlags::kWrite) {
+              if (workReg->regByteMask() & ~(opRwInfo.writeByteMask() | opRwInfo.extendByteMask())) {
+                // Not write-only operation.
+                flags = (flags & ~RATiedFlags::kOut) | (RATiedFlags::kRead | RATiedFlags::kUse);
+              }
+            }
+
+            RegGroup group = workReg->group();
+
+            RegMask useRegs = _pass->_availableRegs[group];
+            RegMask outRegs = useRegs;
+
+            uint32_t useId = BaseReg::kIdBad;
+            uint32_t outId = BaseReg::kIdBad;
+
+            uint32_t useRewriteMask = 0;
+            uint32_t outRewriteMask = 0;
+
+            if (opRwInfo.consecutiveLeadCount()) {
+              // There must be a single consecutive register lead, otherwise the RW data is invalid.
+              if (consecutiveOffset != 0xFFFFFFFFu)
+                return DebugUtils::errored(kErrorInvalidState);
+
+              // A consecutive lead register cannot be used as a consecutive +1/+2/+3 register, the registers must be distinct.
+              if (RATiedReg::consecutiveDataFromFlags(flags) != 0)
+                return DebugUtils::errored(kErrorNotConsecutiveRegs);
+
+              flags |= RATiedFlags::kLeadConsecutive | RATiedReg::consecutiveDataToFlags(opRwInfo.consecutiveLeadCount() - 1);
+              consecutiveOffset = 0;
+
+              RegMask filter = raConsecutiveLeadCountToRegMaskFilter[opRwInfo.consecutiveLeadCount()];
+              if (Support::test(flags, RATiedFlags::kUse)) {
+                flags |= RATiedFlags::kUseConsecutive;
+                useRegs &= filter;
+              }
+              else {
+                flags |= RATiedFlags::kOutConsecutive;
+                outRegs &= filter;
+              }
+            }
+
+            if (Support::test(flags, RATiedFlags::kUse)) {
+              useRewriteMask = Support::bitMask(inst->getRewriteIndex(&reg._baseId));
+              if (opRwInfo.hasOpFlag(OpRWFlags::kRegPhysId)) {
+                useId = opRwInfo.physId();
+                flags |= RATiedFlags::kUseFixed;
+              }
+              else if (opRwInfo.hasOpFlag(OpRWFlags::kConsecutive)) {
+                if (consecutiveOffset == 0xFFFFFFFFu)
+                  return DebugUtils::errored(kErrorInvalidState);
+                flags |= RATiedFlags::kUseConsecutive | RATiedReg::consecutiveDataToFlags(++consecutiveOffset);
+              }
+            }
+            else {
+              outRewriteMask = Support::bitMask(inst->getRewriteIndex(&reg._baseId));
+              if (opRwInfo.hasOpFlag(OpRWFlags::kRegPhysId)) {
+                outId = opRwInfo.physId();
+                flags |= RATiedFlags::kOutFixed;
+              }
+              else if (opRwInfo.hasOpFlag(OpRWFlags::kConsecutive)) {
+                if (consecutiveOffset == 0xFFFFFFFFu)
+                  return DebugUtils::errored(kErrorInvalidState);
+                flags |= RATiedFlags::kOutConsecutive | RATiedReg::consecutiveDataToFlags(++consecutiveOffset);
+              }
+            }
+
+            // Special cases regarding element access.
+            if (reg.as<Vec>().hasElementIndex()) {
+              // Only the first 0..15 registers can be used if the register uses
+              // element accessor that accesses half-words (h[0..7] elements).
+              if (instInfo.hasFlag(InstDB::kInstFlagVH0_15) && reg.as<Vec>().elementType() == Vec::kElementTypeH) {
+                if (Support::test(flags, RATiedFlags::kUse))
+                  useId &= 0x0000FFFFu;
+                else
+                  outId &= 0x0000FFFFu;
+              }
+            }
+
+            ASMJIT_PROPAGATE(ib.add(workReg, flags, useRegs, useId, useRewriteMask, outRegs, outId, outRewriteMask, opRwInfo.rmSize(), consecutiveParent));
+            if (singleRegOps == i)
+              singleRegOps++;
+
+            if (Support::test(flags, RATiedFlags::kLeadConsecutive | RATiedFlags::kUseConsecutive | RATiedFlags::kOutConsecutive))
+              consecutiveParent = workReg->workId();
+          }
+        }
+        else if (op.isMem()) {
+          // Memory Operand
+          // --------------
+          const Mem& mem = op.as<Mem>();
+
+          if (mem.isRegHome()) {
+            RAWorkReg* workReg;
+            ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(mem.baseId()), &workReg));
+            _pass->getOrCreateStackSlot(workReg);
+          }
+          else if (mem.hasBaseReg()) {
+            uint32_t vIndex = Operand::virtIdToIndex(mem.baseId());
+            if (vIndex < Operand::kVirtIdCount) {
+              RAWorkReg* workReg;
+              ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+              RATiedFlags flags = raMemBaseRwFlags(opRwInfo.opFlags());
+              RegGroup group = workReg->group();
+              RegMask allocable = _pass->_availableRegs[group];
+
+              // Base registers have never fixed id on ARM.
+              const uint32_t useId = BaseReg::kIdBad;
+              const uint32_t outId = BaseReg::kIdBad;
+
+              uint32_t useRewriteMask = 0;
+              uint32_t outRewriteMask = 0;
+
+              if (Support::test(flags, RATiedFlags::kUse))
+                useRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._baseId));
+              else
+                outRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._baseId));
+
+              ASMJIT_PROPAGATE(ib.add(workReg, flags, allocable, useId, useRewriteMask, allocable, outId, outRewriteMask));
+            }
+          }
+
+          if (mem.hasIndexReg()) {
+            uint32_t vIndex = Operand::virtIdToIndex(mem.indexId());
+            if (vIndex < Operand::kVirtIdCount) {
+              RAWorkReg* workReg;
+              ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+              RATiedFlags flags = raMemIndexRwFlags(opRwInfo.opFlags());
+              RegGroup group = workReg->group();
+              RegMask allocable = _pass->_availableRegs[group];
+
+              // Index registers have never fixed id on ARM.
+              const uint32_t useId = BaseReg::kIdBad;
+              const uint32_t outId = BaseReg::kIdBad;
+
+              uint32_t useRewriteMask = 0;
+              uint32_t outRewriteMask = 0;
+
+              if (Support::test(flags, RATiedFlags::kUse))
+                useRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._data[Operand::kDataMemIndexId]));
+              else
+                outRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._data[Operand::kDataMemIndexId]));
+
+              ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRead, allocable, useId, useRewriteMask, allocable, outId, outRewriteMask));
+            }
+          }
+        }
+      }
+    }
+
+    controlType = getControlFlowType(instId);
+  }
+
+  return kErrorOk;
+}
+
+// a64::RACFGBuilder - OnInvoke
+// ============================
+
+Error RACFGBuilder::onBeforeInvoke(InvokeNode* invokeNode) noexcept {
+  const FuncDetail& fd = invokeNode->detail();
+  uint32_t argCount = invokeNode->argCount();
+
+  cc()->_setCursor(invokeNode->prev());
+
+  for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+    const FuncValuePack& argPack = fd.argPack(argIndex);
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      if (!argPack[valueIndex])
+        break;
+
+      const FuncValue& arg = argPack[valueIndex];
+      const Operand& op = invokeNode->arg(argIndex, valueIndex);
+
+      if (op.isNone())
+        continue;
+
+      if (op.isReg()) {
+        const Reg& reg = op.as<Reg>();
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+        if (arg.isReg()) {
+          RegGroup regGroup = workReg->group();
+          RegGroup argGroup = Reg::groupOf(arg.regType());
+
+          if (regGroup != argGroup) {
+            // TODO: [ARM] Conversion is not supported.
+            return DebugUtils::errored(kErrorInvalidAssignment);
+          }
+        }
+        else {
+          ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, reg));
+        }
+      }
+      else if (op.isImm()) {
+        if (arg.isReg()) {
+          BaseReg reg;
+          ASMJIT_PROPAGATE(moveImmToRegArg(invokeNode, arg, op.as<Imm>(), &reg));
+          invokeNode->_args[argIndex][valueIndex] = reg;
+        }
+        else {
+          ASMJIT_PROPAGATE(moveImmToStackArg(invokeNode, arg, op.as<Imm>()));
+        }
+      }
+    }
+  }
+
+  cc()->_setCursor(invokeNode);
+
+  if (fd.hasRet()) {
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      const FuncValue& ret = fd.ret(valueIndex);
+      if (!ret)
+        break;
+
+      const Operand& op = invokeNode->ret(valueIndex);
+      if (op.isReg()) {
+        const Reg& reg = op.as<Reg>();
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+        if (ret.isReg()) {
+          RegGroup regGroup = workReg->group();
+          RegGroup retGroup = Reg::groupOf(ret.regType());
+
+          if (regGroup != retGroup) {
+            // TODO: [ARM] Conversion is not supported.
+            return DebugUtils::errored(kErrorInvalidAssignment);
+          }
+        }
+      }
+    }
+  }
+
+  // This block has function call(s).
+  _curBlock->addFlags(RABlockFlags::kHasFuncCalls);
+  _pass->func()->frame().addAttributes(FuncAttributes::kHasFuncCalls);
+  _pass->func()->frame().updateCallStackSize(fd.argStackSize());
+
+  return kErrorOk;
+}
+
+Error RACFGBuilder::onInvoke(InvokeNode* invokeNode, RAInstBuilder& ib) noexcept {
+  uint32_t argCount = invokeNode->argCount();
+  const FuncDetail& fd = invokeNode->detail();
+
+  for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+    const FuncValuePack& argPack = fd.argPack(argIndex);
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      if (!argPack[valueIndex])
+        continue;
+
+      const FuncValue& arg = argPack[valueIndex];
+      const Operand& op = invokeNode->arg(argIndex, valueIndex);
+
+      if (op.isNone())
+        continue;
+
+      if (op.isReg()) {
+        const Reg& reg = op.as<Reg>();
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+        if (arg.isIndirect()) {
+          RegGroup regGroup = workReg->group();
+          if (regGroup != RegGroup::kGp)
+            return DebugUtils::errored(kErrorInvalidState);
+          ASMJIT_PROPAGATE(ib.addCallArg(workReg, arg.regId()));
+        }
+        else if (arg.isReg()) {
+          RegGroup regGroup = workReg->group();
+          RegGroup argGroup = Reg::groupOf(arg.regType());
+
+          if (regGroup == argGroup) {
+            ASMJIT_PROPAGATE(ib.addCallArg(workReg, arg.regId()));
+          }
+        }
+      }
+    }
+  }
+
+  for (uint32_t retIndex = 0; retIndex < Globals::kMaxValuePack; retIndex++) {
+    const FuncValue& ret = fd.ret(retIndex);
+    if (!ret)
+      break;
+
+    const Operand& op = invokeNode->ret(retIndex);
+    if (op.isReg()) {
+      const Reg& reg = op.as<Reg>();
+      RAWorkReg* workReg;
+      ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+      if (ret.isReg()) {
+        RegGroup regGroup = workReg->group();
+        RegGroup retGroup = Reg::groupOf(ret.regType());
+
+        if (regGroup == retGroup) {
+          ASMJIT_PROPAGATE(ib.addCallRet(workReg, ret.regId()));
+        }
+      }
+      else {
+        return DebugUtils::errored(kErrorInvalidAssignment);
+      }
+    }
+  }
+
+  // Setup clobbered registers.
+  ib._clobbered[0] = Support::lsbMask<RegMask>(_pass->_physRegCount[RegGroup(0)]) & ~fd.preservedRegs(RegGroup(0));
+  ib._clobbered[1] = Support::lsbMask<RegMask>(_pass->_physRegCount[RegGroup(1)]) & ~fd.preservedRegs(RegGroup(1));
+  ib._clobbered[2] = Support::lsbMask<RegMask>(_pass->_physRegCount[RegGroup(2)]) & ~fd.preservedRegs(RegGroup(2));
+  ib._clobbered[3] = Support::lsbMask<RegMask>(_pass->_physRegCount[RegGroup(3)]) & ~fd.preservedRegs(RegGroup(3));
+
+  return kErrorOk;
+}
+
+// a64::RACFGBuilder - MoveImmToRegArg
+// ===================================
+
+Error RACFGBuilder::moveImmToRegArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept {
+  DebugUtils::unused(invokeNode);
+  ASMJIT_ASSERT(arg.isReg());
+
+  Imm imm(imm_);
+  TypeId typeId = TypeId::kVoid;
+
+  switch (arg.typeId()) {
+    case TypeId::kInt8  : typeId = TypeId::kUInt64; imm.signExtend8Bits(); break;
+    case TypeId::kUInt8 : typeId = TypeId::kUInt64; imm.zeroExtend8Bits(); break;
+    case TypeId::kInt16 : typeId = TypeId::kUInt64; imm.signExtend16Bits(); break;
+    case TypeId::kUInt16: typeId = TypeId::kUInt64; imm.zeroExtend16Bits(); break;
+    case TypeId::kInt32 : typeId = TypeId::kUInt64; imm.signExtend32Bits(); break;
+    case TypeId::kUInt32: typeId = TypeId::kUInt64; imm.zeroExtend32Bits(); break;
+    case TypeId::kInt64 : typeId = TypeId::kUInt64; break;
+    case TypeId::kUInt64: typeId = TypeId::kUInt64; break;
+
+    default:
+      return DebugUtils::errored(kErrorInvalidAssignment);
+  }
+
+  ASMJIT_PROPAGATE(cc()->_newReg(out, typeId, nullptr));
+  cc()->virtRegById(out->id())->setWeight(BaseRAPass::kCallArgWeight);
+  return cc()->mov(out->as<Gp>(), imm);
+}
+
+// a64::RACFGBuilder - MoveImmToStackArg
+// =====================================
+
+Error RACFGBuilder::moveImmToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_) noexcept {
+  BaseReg reg;
+
+  ASMJIT_PROPAGATE(moveImmToRegArg(invokeNode, arg, imm_, &reg));
+  ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, reg));
+
+  return kErrorOk;
+}
+
+// a64::RACFGBuilder - MoveRegToStackArg
+// =====================================
+
+Error RACFGBuilder::moveRegToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const BaseReg& reg) noexcept {
+  DebugUtils::unused(invokeNode);
+  Mem stackPtr = ptr(_pass->_sp.as<Gp>(), arg.stackOffset());
+
+  if (reg.isGp())
+    return cc()->str(reg.as<Gp>(), stackPtr);
+
+  if (reg.isVec())
+    return cc()->str(reg.as<Vec>(), stackPtr);
+
+  return DebugUtils::errored(kErrorInvalidState);
+}
+
+// a64::RACFGBuilder - OnReg
+// =========================
+
+Error RACFGBuilder::onBeforeRet(FuncRetNode* funcRet) noexcept {
+  DebugUtils::unused(funcRet);
+  return kErrorOk;
+}
+
+Error RACFGBuilder::onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept {
+  const FuncDetail& funcDetail = _pass->func()->detail();
+  const Operand* opArray = funcRet->operands();
+  uint32_t opCount = funcRet->opCount();
+
+  for (uint32_t i = 0; i < opCount; i++) {
+    const Operand& op = opArray[i];
+    if (op.isNone()) continue;
+
+    const FuncValue& ret = funcDetail.ret(i);
+    if (ASMJIT_UNLIKELY(!ret.isReg()))
+      return DebugUtils::errored(kErrorInvalidAssignment);
+
+    if (op.isReg()) {
+      // Register return value.
+      const Reg& reg = op.as<Reg>();
+      uint32_t vIndex = Operand::virtIdToIndex(reg.id());
+
+      if (vIndex < Operand::kVirtIdCount) {
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+        RegGroup group = workReg->group();
+        RegMask allocable = _pass->_availableRegs[group];
+        ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRead, allocable, ret.regId(), 0, 0, BaseReg::kIdBad, 0));
+      }
+    }
+    else {
+      return DebugUtils::errored(kErrorInvalidAssignment);
+    }
+  }
+
+  return kErrorOk;
+}
+
+// a64::ARMRAPass - Construction & Destruction
+// ===========================================
+
+ARMRAPass::ARMRAPass() noexcept
+  : BaseRAPass() { _iEmitHelper = &_emitHelper; }
+ARMRAPass::~ARMRAPass() noexcept {}
+
+// a64::ARMRAPass - OnInit / OnDone
+// ================================
+
+void ARMRAPass::onInit() noexcept {
+  Arch arch = cc()->arch();
+
+  _emitHelper._emitter = _cb;
+
+  _archTraits = &ArchTraits::byArch(arch);
+  _physRegCount.set(RegGroup::kGp, 32);
+  _physRegCount.set(RegGroup::kVec, 32);
+  _physRegCount.set(RegGroup::kExtraVirt2, 0);
+  _physRegCount.set(RegGroup::kExtraVirt3, 0);
+  _buildPhysIndex();
+
+  _availableRegCount = _physRegCount;
+  _availableRegs[RegGroup::kGp] = Support::lsbMask<uint32_t>(_physRegCount.get(RegGroup::kGp));
+  _availableRegs[RegGroup::kVec] = Support::lsbMask<uint32_t>(_physRegCount.get(RegGroup::kVec));
+  _availableRegs[RegGroup::kExtraVirt3] = Support::lsbMask<uint32_t>(_physRegCount.get(RegGroup::kExtraVirt2));
+  _availableRegs[RegGroup::kExtraVirt3] = Support::lsbMask<uint32_t>(_physRegCount.get(RegGroup::kExtraVirt3));
+
+  _scratchRegIndexes[0] = uint8_t(27);
+  _scratchRegIndexes[1] = uint8_t(28);
+
+  // The architecture specific setup makes implicitly all registers available. So
+  // make unavailable all registers that are special and cannot be used in general.
+  bool hasFP = _func->frame().hasPreservedFP();
+
+  if (hasFP)
+    makeUnavailable(RegGroup::kGp, Gp::kIdFp);
+
+  makeUnavailable(RegGroup::kGp, Gp::kIdSp);
+  makeUnavailable(RegGroup::kGp, Gp::kIdOs); // OS-specific use, usually TLS.
+
+  _sp = sp;
+  _fp = x29;
+}
+
+void ARMRAPass::onDone() noexcept {}
+
+// a64::ARMRAPass - BuildCFG
+// =========================
+
+Error ARMRAPass::buildCFG() noexcept {
+  return RACFGBuilder(this).run();
+}
+
+// a64::ARMRAPass - Rewrite
+// ========================
+
+ASMJIT_FAVOR_SPEED Error ARMRAPass::_rewrite(BaseNode* first, BaseNode* stop) noexcept {
+  uint32_t virtCount = cc()->_vRegArray.size();
+
+  BaseNode* node = first;
+  while (node != stop) {
+    BaseNode* next = node->next();
+    if (node->isInst()) {
+      InstNode* inst = node->as<InstNode>();
+      RAInst* raInst = node->passData<RAInst>();
+
+      Operand* operands = inst->operands();
+      uint32_t opCount = inst->opCount();
+
+      uint32_t i;
+
+      // Rewrite virtual registers into physical registers.
+      if (raInst) {
+        // If the instruction contains pass data (raInst) then it was a subject
+        // for register allocation and must be rewritten to use physical regs.
+        RATiedReg* tiedRegs = raInst->tiedRegs();
+        uint32_t tiedCount = raInst->tiedCount();
+
+        for (i = 0; i < tiedCount; i++) {
+          RATiedReg* tiedReg = &tiedRegs[i];
+
+          Support::BitWordIterator<uint32_t> useIt(tiedReg->useRewriteMask());
+          uint32_t useId = tiedReg->useId();
+          while (useIt.hasNext())
+            inst->rewriteIdAtIndex(useIt.next(), useId);
+
+          Support::BitWordIterator<uint32_t> outIt(tiedReg->outRewriteMask());
+          uint32_t outId = tiedReg->outId();
+          while (outIt.hasNext())
+            inst->rewriteIdAtIndex(outIt.next(), outId);
+        }
+
+        // This data is allocated by Zone passed to `runOnFunction()`, which
+        // will be reset after the RA pass finishes. So reset this data to
+        // prevent having a dead pointer after the RA pass is complete.
+        node->resetPassData();
+
+        if (ASMJIT_UNLIKELY(node->type() != NodeType::kInst)) {
+          // FuncRet terminates the flow, it must either be removed if the exit
+          // label is next to it (optimization) or patched to an architecture
+          // dependent jump instruction that jumps to the function's exit before
+          // the epilog.
+          if (node->type() == NodeType::kFuncRet) {
+            RABlock* block = raInst->block();
+            if (!isNextTo(node, _func->exitNode())) {
+              cc()->_setCursor(node->prev());
+              ASMJIT_PROPAGATE(emitJump(_func->exitNode()->label()));
+            }
+
+            BaseNode* prev = node->prev();
+            cc()->removeNode(node);
+            block->setLast(prev);
+          }
+        }
+      }
+
+      // Rewrite stack slot addresses.
+      for (i = 0; i < opCount; i++) {
+        Operand& op = operands[i];
+        if (op.isMem()) {
+          BaseMem& mem = op.as<BaseMem>();
+          if (mem.isRegHome()) {
+            uint32_t virtIndex = Operand::virtIdToIndex(mem.baseId());
+            if (ASMJIT_UNLIKELY(virtIndex >= virtCount))
+              return DebugUtils::errored(kErrorInvalidVirtId);
+
+            VirtReg* virtReg = cc()->virtRegByIndex(virtIndex);
+            RAWorkReg* workReg = virtReg->workReg();
+            ASMJIT_ASSERT(workReg != nullptr);
+
+            RAStackSlot* slot = workReg->stackSlot();
+            int32_t offset = slot->offset();
+
+            mem._setBase(_sp.type(), slot->baseRegId());
+            mem.clearRegHome();
+            mem.addOffsetLo32(offset);
+          }
+        }
+      }
+
+      // Rewrite `loadAddressOf()` construct.
+      if (inst->realId() == Inst::kIdAdr && inst->opCount() == 2 && inst->op(1).isMem()) {
+        BaseMem mem = inst->op(1).as<BaseMem>();
+        int64_t offset = mem.offset();
+
+        if (!mem.hasBaseOrIndex()) {
+          inst->setId(Inst::kIdMov);
+          inst->setOp(1, Imm(offset));
+        }
+        else {
+          if (mem.hasIndex())
+            return DebugUtils::errored(kErrorInvalidAddressIndex);
+
+          GpX dst(inst->op(0).as<Gp>().id());
+          GpX base(mem.baseId());
+
+          InstId arithInstId = offset < 0 ? Inst::kIdSub : Inst::kIdAdd;
+          uint64_t absOffset = offset < 0 ? Support::neg(uint64_t(offset)) : uint64_t(offset);
+
+          inst->setId(arithInstId);
+          inst->setOpCount(3);
+          inst->setOp(1, base);
+          inst->setOp(2, Imm(absOffset));
+
+          // Use two operations if the offset cannot be encoded with ADD/SUB.
+          if (absOffset > 0xFFFu && (absOffset & ~uint64_t(0xFFF000u)) != 0) {
+            if (absOffset <= 0xFFFFFFu) {
+              cc()->_setCursor(inst->prev());
+              ASMJIT_PROPAGATE(cc()->emit(arithInstId, dst, base, Imm(absOffset & 0xFFFu)));
+
+              inst->setOp(1, dst);
+              inst->setOp(2, Imm(absOffset & 0xFFF000u));
+            }
+            else {
+              cc()->_setCursor(inst->prev());
+              ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, inst->op(0), Imm(absOffset)));
+
+              inst->setOp(1, base);
+              inst->setOp(2, dst);
+            }
+          }
+        }
+      }
+    }
+
+    node = next;
+  }
+
+  return kErrorOk;
+}
+
+// a64::ARMRAPass - Prolog & Epilog
+// ================================
+
+Error ARMRAPass::updateStackFrame() noexcept {
+  if (_func->frame().hasFuncCalls())
+    _func->frame().addDirtyRegs(RegGroup::kGp, Support::bitMask(Gp::kIdLr));
+
+  return BaseRAPass::updateStackFrame();
+}
+
+// a64::ARMRAPass - OnEmit
+// =======================
+
+Error ARMRAPass::emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept {
+  RAWorkReg* wReg = workRegById(workId);
+  BaseReg dst(wReg->signature(), dstPhysId);
+  BaseReg src(wReg->signature(), srcPhysId);
+
+  const char* comment = nullptr;
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
+    _tmpString.assignFormat("<MOVE> %s", workRegById(workId)->name());
+    comment = _tmpString.data();
+  }
+#endif
+
+  return _emitHelper.emitRegMove(dst, src, wReg->typeId(), comment);
+}
+
+Error ARMRAPass::emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept {
+  DebugUtils::unused(aWorkId, aPhysId, bWorkId, bPhysId);
+  return DebugUtils::errored(kErrorInvalidState);
+}
+
+Error ARMRAPass::emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept {
+  RAWorkReg* wReg = workRegById(workId);
+  BaseReg dstReg(wReg->signature(), dstPhysId);
+  BaseMem srcMem(workRegAsMem(wReg));
+
+  const char* comment = nullptr;
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
+    _tmpString.assignFormat("<LOAD> %s", workRegById(workId)->name());
+    comment = _tmpString.data();
+  }
+#endif
+
+  return _emitHelper.emitRegMove(dstReg, srcMem, wReg->typeId(), comment);
+}
+
+Error ARMRAPass::emitSave(uint32_t workId, uint32_t srcPhysId) noexcept {
+  RAWorkReg* wReg = workRegById(workId);
+  BaseMem dstMem(workRegAsMem(wReg));
+  BaseReg srcReg(wReg->signature(), srcPhysId);
+
+  const char* comment = nullptr;
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
+    _tmpString.assignFormat("<SAVE> %s", workRegById(workId)->name());
+    comment = _tmpString.data();
+  }
+#endif
+
+  return _emitHelper.emitRegMove(dstMem, srcReg, wReg->typeId(), comment);
+}
+
+Error ARMRAPass::emitJump(const Label& label) noexcept {
+  return cc()->b(label);
+}
+
+Error ARMRAPass::emitPreCall(InvokeNode* invokeNode) noexcept {
+  DebugUtils::unused(invokeNode);
+  return kErrorOk;
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64 && !ASMJIT_NO_COMPILER
diff --git a/lib/lepton/asmjit/arm/a64rapass_p.h b/lib/lepton/asmjit/arm/a64rapass_p.h
new file mode 100644
index 0000000000..e1a90ab8df
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64rapass_p.h
@@ -0,0 +1,105 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64RAPASS_P_H_INCLUDED
+#define ASMJIT_ARM_A64RAPASS_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/compiler.h"
+#include "../core/rabuilders_p.h"
+#include "../core/rapass_p.h"
+#include "../arm/a64assembler.h"
+#include "../arm/a64compiler.h"
+#include "../arm/a64emithelper_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+//! ARM register allocation pass.
+//!
+//! Takes care of generating function prologs and epilogs, and also performs
+//! register allocation.
+class ARMRAPass : public BaseRAPass {
+public:
+  ASMJIT_NONCOPYABLE(ARMRAPass)
+  typedef BaseRAPass Base;
+
+  EmitHelper _emitHelper;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ARMRAPass() noexcept;
+  virtual ~ARMRAPass() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the compiler casted to `arm::Compiler`.
+  inline Compiler* cc() const noexcept { return static_cast<Compiler*>(_cb); }
+
+  //! Returns emit helper.
+  inline EmitHelper* emitHelper() noexcept { return &_emitHelper; }
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  void onInit() noexcept override;
+  void onDone() noexcept override;
+
+  //! \}
+
+  //! \name CFG
+  //! \{
+
+  Error buildCFG() noexcept override;
+
+  //! \}
+
+  //! \name Rewrite
+  //! \{
+
+  Error _rewrite(BaseNode* first, BaseNode* stop) noexcept override;
+
+  //! \}
+
+  //! \name Prolog & Epilog
+  //! \{
+
+  Error updateStackFrame() noexcept override;
+
+  //! \}
+
+  //! \name Emit Helpers
+  //! \{
+
+  Error emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept override;
+  Error emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept override;
+
+  Error emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept override;
+  Error emitSave(uint32_t workId, uint32_t srcPhysId) noexcept override;
+
+  Error emitJump(const Label& label) noexcept override;
+  Error emitPreCall(InvokeNode* invokeNode) noexcept override;
+
+  //! \}
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_ARM_A64RAPASS_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64utils.h b/lib/lepton/asmjit/arm/a64utils.h
new file mode 100644
index 0000000000..4a88ca5172
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64utils.h
@@ -0,0 +1,179 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64UTILS_H_INCLUDED
+#define ASMJIT_ARM_A64UTILS_H_INCLUDED
+
+#include "../arm/a64globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! Public utilities and helpers for targeting AArch64 architecture.
+namespace Utils {
+
+//! Decomposed fields of a logical immediate value (AArch64).
+struct LogicalImm {
+  uint32_t n;
+  uint32_t s;
+  uint32_t r;
+};
+
+//! Encodes the given `imm` value of the given `width` to a logical immediate value represented as N, S, and R fields
+//! and writes these fields to `out`.
+//!
+//! Encoding Table:
+//!
+//! ```
+//! +---+--------+--------+------+
+//! | N |  ImmS  |  ImmR  | Size |
+//! +---+--------+--------+------+
+//! | 1 | ssssss | rrrrrr |  64  |
+//! | 0 | 0sssss | .rrrrr |  32  |
+//! | 0 | 10ssss | ..rrrr |  16  |
+//! | 0 | 110sss | ...rrr |  8   |
+//! | 0 | 1110ss | ....rr |  4   |
+//! | 0 | 11110s | .....r |  2   |
+//! +---+--------+--------+------+
+//! ```
+ASMJIT_MAYBE_UNUSED
+static bool encodeLogicalImm(uint64_t imm, uint32_t width, a64::Utils::LogicalImm* out) noexcept {
+  // Determine the element width, which must be 2, 4, 8, 16, 32, or 64 bits.
+  do {
+    width /= 2;
+    uint64_t mask = (uint64_t(1) << width) - 1u;
+    if ((imm & mask) != ((imm >> width) & mask)) {
+      width *= 2;
+      break;
+    }
+  } while (width > 2);
+
+  // Patterns of all zeros and all ones are not encodable.
+  uint64_t lsbMask = Support::lsbMask<uint64_t>(width);
+  imm &= lsbMask;
+
+  if (imm == 0 || imm == lsbMask)
+    return false;
+
+  // Inspect the pattern and get the most important bit indexes.
+  //
+  //         oIndex <-+      +-> zIndex
+  //                  |      |
+  // |..zeros..|oCount|zCount|..ones..|
+  // |000000000|111111|000000|11111111|
+
+  uint32_t zIndex = Support::ctz(~imm);
+  uint64_t zImm = imm ^ ((uint64_t(1) << zIndex) - 1);
+  uint32_t zCount = (zImm ? Support::ctz(zImm) : width) - zIndex;
+
+  uint32_t oIndex = zIndex + zCount;
+  uint64_t oImm = ~(zImm ^ Support::lsbMask<uint64_t>(oIndex));
+  uint32_t oCount = (oImm ? Support::ctz(oImm) : width) - (oIndex);
+
+  // Verify whether the bit-pattern is encodable.
+  uint64_t mustBeZero = oImm ^ ~Support::lsbMask<uint64_t>(oIndex + oCount);
+  if (mustBeZero != 0 || (zIndex > 0 && width - (oIndex + oCount) != 0))
+    return false;
+
+  out->n = width == 64;
+  out->s = (oCount + zIndex - 1) | (Support::neg(width * 2) & 0x3F);
+  out->r = width - oIndex;
+  return true;
+}
+
+//! Returns true if the given `imm` value is encodable as a logical immediate. The `width` argument describes the
+//! width of the operation, and must be either 32 or 64. This function can be used to test whether an immediate
+//! value can be used with AND, ANDS, BIC, BICS, EON, EOR, ORN, and ORR instruction.
+ASMJIT_MAYBE_UNUSED
+static inline bool isLogicalImm(uint64_t imm, uint32_t width) noexcept {
+  LogicalImm dummy;
+  return encodeLogicalImm(imm, width, &dummy);
+}
+
+//! Returns true if the given `imm` value is a byte mask. Byte mask has each byte part of the value set to either
+//! 0x00 or 0xFF. Some ARM instructions accept immediates that form a byte-mask and this function can be used to
+//! verify that the immediate is encodable before using the value.
+template<typename T>
+static inline bool isByteMaskImm8(const T& imm) noexcept {
+  constexpr T kMask = T(0x0101010101010101 & Support::allOnes<T>());
+  return imm == (imm & kMask) * T(255);
+}
+
+//! \cond
+//! A generic implementation that checjs whether a floating point value can be converted to ARM Imm8.
+template<typename T, uint32_t kNumBBits, uint32_t kNumCDEFGHBits, uint32_t kNumZeroBits>
+static inline bool isFPImm8Generic(T val) noexcept {
+  constexpr uint32_t kAllBsMask = Support::lsbMask<uint32_t>(kNumBBits);
+  constexpr uint32_t kB0Pattern = Support::bitMask(kNumBBits - 1);
+  constexpr uint32_t kB1Pattern = kAllBsMask ^ kB0Pattern;
+
+  T immZ = val & Support::lsbMask<T>(kNumZeroBits);
+  uint32_t immB = uint32_t(val >> (kNumZeroBits + kNumCDEFGHBits)) & kAllBsMask;
+
+  // ImmZ must be all zeros and ImmB must either be B0 or B1 pattern.
+  return immZ == 0 && (immB == kB0Pattern || immB == kB1Pattern);
+}
+//! \endcond
+
+//! Returns true if the given half precision floating point `val` can be encoded as ARM IMM8 value, which represents
+//! a limited set of floating point immediate values, which can be used with FMOV instruction.
+//!
+//! The floating point must have bits distributed in the following way:
+//!
+//! ```
+//! [aBbbcdef|gh000000]
+//! ```
+static inline bool isFP16Imm8(uint32_t val) noexcept { return isFPImm8Generic<uint32_t, 3, 6, 6>(val); }
+
+//! Returns true if the given single precision floating point `val` can be encoded as ARM IMM8 value, which represents
+//! a limited set of floating point immediate values, which can be used with FMOV instruction.
+//!
+//! The floating point must have bits distributed in the following way:
+//!
+//! ```
+//! [aBbbbbbc|defgh000|00000000|00000000]
+//! ```
+static inline bool isFP32Imm8(uint32_t val) noexcept { return isFPImm8Generic<uint32_t, 6, 6, 19>(val); }
+//! \overload
+static inline bool isFP32Imm8(float val) noexcept { return isFP32Imm8(Support::bitCast<uint32_t>(val)); }
+
+//! Returns true if the given double precision floating point `val` can be encoded as ARM IMM8 value, which represents
+//! a limited set of floating point immediate values, which can be used with FMOV instruction.
+//!
+//! The floating point must have bits distributed in the following way:
+//!
+//! ```
+//! [aBbbbbbb|bbcdefgh|00000000|00000000|00000000|00000000|00000000|00000000]
+//! ```
+static inline bool isFP64Imm8(uint64_t val) noexcept { return isFPImm8Generic<uint64_t, 9, 6, 48>(val); }
+//! \overload
+static inline bool isFP64Imm8(double val) noexcept { return isFP64Imm8(Support::bitCast<uint64_t>(val)); }
+
+//! \cond
+template<typename T, uint32_t kNumBBits, uint32_t kNumCDEFGHBits, uint32_t kNumZeroBits>
+static inline uint32_t encodeFPToImm8Generic(T val) noexcept {
+  uint32_t bits = uint32_t(val >> kNumZeroBits);
+  return ((bits >> (kNumBBits + kNumCDEFGHBits - 7)) & 0x80u) | (bits & 0x7F);
+}
+//! \endcond
+
+//! Encodes a double precision floating point value into IMM8 format.
+//!
+//! \note This function expects that `isFP64Imm8(val) == true` so it doesn't perform any checks of the value and just
+//! rearranges some bits into Imm8 order.
+static inline uint32_t encodeFP64ToImm8(uint64_t val) noexcept { return encodeFPToImm8Generic<uint64_t, 9, 6, 48>(val); }
+//! \overload
+static inline uint32_t encodeFP64ToImm8(double val) noexcept { return encodeFP64ToImm8(Support::bitCast<uint64_t>(val)); }
+
+} // {Utils}
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64UTILS_H_INCLUDED
+
diff --git a/lib/lepton/asmjit/arm/armformatter.cpp b/lib/lepton/asmjit/arm/armformatter.cpp
new file mode 100644
index 0000000000..0432043106
--- /dev/null
+++ b/lib/lepton/asmjit/arm/armformatter.cpp
@@ -0,0 +1,143 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../arm/armformatter_p.h"
+#include "../arm/armoperand.h"
+#include "../arm/a64instapi_p.h"
+#include "../arm/a64instdb_p.h"
+
+#ifndef ASMJIT_NO_COMPILER
+  #include "../core/compiler.h"
+#endif
+
+ASMJIT_BEGIN_SUB_NAMESPACE(arm)
+
+// arm::FormatterInternal - Format Feature
+// =======================================
+
+Error FormatterInternal::formatFeature(String& sb, uint32_t featureId) noexcept {
+  // @EnumStringBegin{"enum": "CpuFeatures::ARM", "output": "sFeature", "strip": "k"}@
+  static const char sFeatureString[] =
+    "None\0"
+    "THUMB\0"
+    "THUMBv2\0"
+    "ARMv6\0"
+    "ARMv7\0"
+    "ARMv8a\0"
+    "ARMv8_1a\0"
+    "ARMv8_2a\0"
+    "ARMv8_3a\0"
+    "ARMv8_4a\0"
+    "ARMv8_5a\0"
+    "ARMv8_6a\0"
+    "ARMv8_7a\0"
+    "VFPv2\0"
+    "VFPv3\0"
+    "VFPv4\0"
+    "VFP_D32\0"
+    "AES\0"
+    "ALTNZCV\0"
+    "ASIMD\0"
+    "BF16\0"
+    "BTI\0"
+    "CPUID\0"
+    "CRC32\0"
+    "DGH\0"
+    "DIT\0"
+    "DOTPROD\0"
+    "EDSP\0"
+    "FCMA\0"
+    "FJCVTZS\0"
+    "FLAGM\0"
+    "FP16CONV\0"
+    "FP16FML\0"
+    "FP16FULL\0"
+    "FRINT\0"
+    "I8MM\0"
+    "IDIVA\0"
+    "IDIVT\0"
+    "LSE\0"
+    "MTE\0"
+    "RCPC_IMMO\0"
+    "RDM\0"
+    "PMU\0"
+    "PMULL\0"
+    "RNG\0"
+    "SB\0"
+    "SHA1\0"
+    "SHA2\0"
+    "SHA3\0"
+    "SHA512\0"
+    "SM3\0"
+    "SM4\0"
+    "SSBS\0"
+    "SVE\0"
+    "SVE_BF16\0"
+    "SVE_F32MM\0"
+    "SVE_F64MM\0"
+    "SVE_I8MM\0"
+    "SVE_PMULL\0"
+    "SVE2\0"
+    "SVE2_AES\0"
+    "SVE2_BITPERM\0"
+    "SVE2_SHA3\0"
+    "SVE2_SM4\0"
+    "TME\0"
+    "<Unknown>\0";
+
+  static const uint16_t sFeatureIndex[] = {
+    0, 5, 11, 19, 25, 31, 38, 47, 56, 65, 74, 83, 92, 101, 107, 113, 119, 127,
+    131, 139, 145, 150, 154, 160, 166, 170, 174, 182, 187, 192, 200, 206, 215,
+    223, 232, 238, 243, 249, 255, 259, 263, 273, 277, 281, 287, 291, 294, 299,
+    304, 309, 316, 320, 324, 329, 333, 342, 352, 362, 371, 381, 386, 395, 408,
+    418, 427, 431
+  };
+  // @EnumStringEnd@
+
+  return sb.append(sFeatureString + sFeatureIndex[Support::min<uint32_t>(featureId, uint32_t(CpuFeatures::ARM::kMaxValue) + 1)]);
+}
+
+// arm::FormatterInternal - Format Constants
+// =========================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatCondCode(String& sb, CondCode cc) noexcept {
+  static const char condCodeData[] =
+    "al\0" "na\0"
+    "eq\0" "ne\0"
+    "cs\0" "cc\0" "mi\0" "pl\0" "vs\0" "vc\0"
+    "hi\0" "ls\0" "ge\0" "lt\0" "gt\0" "le\0"
+    "<Unknown>";
+  return sb.append(condCodeData + Support::min<uint32_t>(uint32_t(cc), 16u) * 3);
+}
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatShiftOp(String& sb, ShiftOp shiftOp) noexcept {
+  const char* str = "<Unknown>";
+  switch (shiftOp) {
+    case ShiftOp::kLSL: str = "lsl"; break;
+    case ShiftOp::kLSR: str = "lsr"; break;
+    case ShiftOp::kASR: str = "asr"; break;
+    case ShiftOp::kROR: str = "ror"; break;
+    case ShiftOp::kRRX: str = "rrx"; break;
+    case ShiftOp::kMSL: str = "msl"; break;
+    case ShiftOp::kUXTB: str = "uxtb"; break;
+    case ShiftOp::kUXTH: str = "uxth"; break;
+    case ShiftOp::kUXTW: str = "uxtw"; break;
+    case ShiftOp::kUXTX: str = "uxtx"; break;
+    case ShiftOp::kSXTB: str = "sxtb"; break;
+    case ShiftOp::kSXTH: str = "sxth"; break;
+    case ShiftOp::kSXTW: str = "sxtw"; break;
+    case ShiftOp::kSXTX: str = "sxtx"; break;
+  }
+  return sb.append(str);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
diff --git a/lib/lepton/asmjit/arm/armformatter_p.h b/lib/lepton/asmjit/arm/armformatter_p.h
new file mode 100644
index 0000000000..582173054a
--- /dev/null
+++ b/lib/lepton/asmjit/arm/armformatter_p.h
@@ -0,0 +1,44 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_ARMFORMATTER_P_H_INCLUDED
+#define ASMJIT_ARM_ARMFORMATTER_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/formatter.h"
+#include "../core/string.h"
+#include "../arm/armglobals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(arm)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_arm
+//! \{
+
+namespace FormatterInternal {
+
+Error ASMJIT_CDECL formatFeature(
+  String& sb,
+  uint32_t featureId) noexcept;
+
+Error ASMJIT_CDECL formatCondCode(
+  String& sb,
+  CondCode cc) noexcept;
+
+Error ASMJIT_CDECL formatShiftOp(
+  String& sb,
+  ShiftOp shiftOp) noexcept;
+
+} // {FormatterInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
+#endif // ASMJIT_ARM_ARMFORMATTER_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/armglobals.h b/lib/lepton/asmjit/arm/armglobals.h
new file mode 100644
index 0000000000..506646f806
--- /dev/null
+++ b/lib/lepton/asmjit/arm/armglobals.h
@@ -0,0 +1,21 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_ARMGLOBALS_H_INCLUDED
+#define ASMJIT_ARM_ARMGLOBALS_H_INCLUDED
+
+#include "../core/archcommons.h"
+#include "../core/inst.h"
+
+//! \namespace asmjit::arm
+//! \ingroup asmjit_arm
+//!
+//! API shared between AArch32 & AArch64 backends.
+
+ASMJIT_BEGIN_SUB_NAMESPACE(arm)
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_ARMGLOBALS_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/armoperand.h b/lib/lepton/asmjit/arm/armoperand.h
new file mode 100644
index 0000000000..ede829d9c1
--- /dev/null
+++ b/lib/lepton/asmjit/arm/armoperand.h
@@ -0,0 +1,621 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_ARMOPERAND_H_INCLUDED
+#define ASMJIT_ARM_ARMOPERAND_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/operand.h"
+#include "../core/type.h"
+#include "../arm/armglobals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(arm)
+
+//! \addtogroup asmjit_arm
+//! \{
+
+class Reg;
+class Mem;
+
+class Gp;
+class GpW;
+class GpX;
+
+class Vec;
+class VecB;
+class VecH;
+class VecS;
+class VecD;
+class VecV;
+
+//! Register traits (ARM/AArch64).
+//!
+//! Register traits contains information about a particular register type. It's used by asmjit to setup register
+//! information on-the-fly and to populate tables that contain register information (this way it's possible to
+//! change register types and groups without having to reorder these tables).
+template<RegType kRegType>
+struct RegTraits : public BaseRegTraits {};
+
+//! \cond
+// <--------------------+-----+-------------------------+------------------------+---+---+------------------+
+//                      | Reg |        Reg-Type         |        Reg-Group       |Sz |Cnt|      TypeId      |
+// <--------------------+-----+-------------------------+------------------------+---+---+------------------+
+ASMJIT_DEFINE_REG_TRAITS(GpW  , RegType::kARM_GpW       , RegGroup::kGp          , 4 , 32, TypeId::kInt32   );
+ASMJIT_DEFINE_REG_TRAITS(GpX  , RegType::kARM_GpX       , RegGroup::kGp          , 8 , 32, TypeId::kInt64   );
+ASMJIT_DEFINE_REG_TRAITS(VecB , RegType::kARM_VecB      , RegGroup::kVec         , 1 , 32, TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(VecH , RegType::kARM_VecH      , RegGroup::kVec         , 2 , 32, TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(VecS , RegType::kARM_VecS      , RegGroup::kVec         , 4 , 32, TypeId::kInt32x1 );
+ASMJIT_DEFINE_REG_TRAITS(VecD , RegType::kARM_VecD      , RegGroup::kVec         , 8 , 32, TypeId::kInt32x2 );
+ASMJIT_DEFINE_REG_TRAITS(VecV , RegType::kARM_VecV      , RegGroup::kVec         , 16, 32, TypeId::kInt32x4 );
+//! \endcond
+
+//! Register (ARM).
+class Reg : public BaseReg {
+public:
+  ASMJIT_DEFINE_ABSTRACT_REG(Reg, BaseReg)
+
+  //! Gets whether the register is a `R|W` register (32-bit).
+  inline constexpr bool isGpW() const noexcept { return baseSignature() == RegTraits<RegType::kARM_GpW>::kSignature; }
+  //! Gets whether the register is an `X` register (64-bit).
+  inline constexpr bool isGpX() const noexcept { return baseSignature() == RegTraits<RegType::kARM_GpX>::kSignature; }
+  //! Gets whether the register is a VEC-B register (8-bit).
+  inline constexpr bool isVecB() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecB>::kSignature; }
+  //! Gets whether the register is a VEC-H register (16-bit).
+  inline constexpr bool isVecH() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecH>::kSignature; }
+  //! Gets whether the register is a VEC-S register (32-bit).
+  inline constexpr bool isVecS() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecS>::kSignature; }
+  //! Gets whether the register is a VEC-D register (64-bit).
+  inline constexpr bool isVecD() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecD>::kSignature; }
+  //! Gets whether the register is a VEC-Q register (128-bit).
+  inline constexpr bool isVecQ() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecV>::kSignature; }
+
+  //! Gets whether the register is either VEC-D (64-bit) or VEC-Q (128-bit).
+  inline constexpr bool isVecDOrQ() const noexcept { return uint32_t(type()) - uint32_t(RegType::kARM_VecD) <= 1u; }
+
+  //! Gets whether the register is a VEC-V register (128-bit).
+  inline constexpr bool isVecV() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecV>::kSignature; }
+
+  template<RegType kRegType>
+  inline void setRegT(uint32_t id) noexcept {
+    setSignature(RegTraits<kRegType>::kSignature);
+    setId(id);
+  }
+
+  inline void setTypeAndId(RegType type, uint32_t id) noexcept {
+    setSignature(signatureOf(type));
+    setId(id);
+  }
+
+  static inline RegGroup groupOf(RegType type) noexcept { return ArchTraits::byArch(Arch::kAArch64).regTypeToGroup(type); }
+  static inline TypeId typeIdOf(RegType type) noexcept { return ArchTraits::byArch(Arch::kAArch64).regTypeToTypeId(type); }
+  static inline OperandSignature signatureOf(RegType type) noexcept { return ArchTraits::byArch(Arch::kAArch64).regTypeToSignature(type); }
+
+  template<RegType kRegType>
+  static inline RegGroup groupOfT() noexcept { return RegTraits<kRegType>::kGroup; }
+
+  template<RegType kRegType>
+  static inline TypeId typeIdOfT() noexcept { return RegTraits<kRegType>::kTypeId; }
+
+  template<RegType kRegType>
+  static inline OperandSignature signatureOfT() noexcept { return RegTraits<kRegType>::kSignature; }
+
+  static inline bool isGpW(const Operand_& op) noexcept { return op.as<Reg>().isGpW(); }
+  static inline bool isGpX(const Operand_& op) noexcept { return op.as<Reg>().isGpX(); }
+  static inline bool isVecB(const Operand_& op) noexcept { return op.as<Reg>().isVecB(); }
+  static inline bool isVecH(const Operand_& op) noexcept { return op.as<Reg>().isVecH(); }
+  static inline bool isVecS(const Operand_& op) noexcept { return op.as<Reg>().isVecS(); }
+  static inline bool isVecD(const Operand_& op) noexcept { return op.as<Reg>().isVecD(); }
+  static inline bool isVecQ(const Operand_& op) noexcept { return op.as<Reg>().isVecQ(); }
+  static inline bool isVecV(const Operand_& op) noexcept { return op.as<Reg>().isVecV(); }
+
+  static inline bool isGpW(const Operand_& op, uint32_t id) noexcept { return isGpW(op) & (op.id() == id); }
+  static inline bool isGpX(const Operand_& op, uint32_t id) noexcept { return isGpX(op) & (op.id() == id); }
+  static inline bool isVecB(const Operand_& op, uint32_t id) noexcept { return isVecB(op) & (op.id() == id); }
+  static inline bool isVecH(const Operand_& op, uint32_t id) noexcept { return isVecH(op) & (op.id() == id); }
+  static inline bool isVecS(const Operand_& op, uint32_t id) noexcept { return isVecS(op) & (op.id() == id); }
+  static inline bool isVecD(const Operand_& op, uint32_t id) noexcept { return isVecD(op) & (op.id() == id); }
+  static inline bool isVecQ(const Operand_& op, uint32_t id) noexcept { return isVecQ(op) & (op.id() == id); }
+  static inline bool isVecV(const Operand_& op, uint32_t id) noexcept { return isVecV(op) & (op.id() == id); }
+};
+
+//! General purpose register (ARM).
+class Gp : public Reg {
+public:
+  ASMJIT_DEFINE_ABSTRACT_REG(Gp, Reg)
+
+  //! Special register id.
+  enum Id : uint32_t {
+    //! Register that depends on OS, could be used as TLS offset.
+    kIdOs = 18,
+    //! Frame pointer.
+    kIdFp = 29,
+    //! Link register.
+    kIdLr = 30,
+    //! Stack register id.
+    kIdSp = 31,
+    //! Zero register id.
+    //!
+    //! Although zero register has the same id as stack register it has a special treatment, because we need to be
+    //! able to distinguish between these two at API level. Some intructions were designed to be used with SP and
+    //! some other with ZR - so we need a way to distinguish these two to make sure we emit the right thing.
+    //!
+    //! The number 63 is not random, when you perform `id & 31` you would always get 31 for both SP and ZR inputs,
+    //! which is the identifier used by AArch64 ISA to encode either SP or ZR depending on the instruction.
+    kIdZr = 63
+  };
+
+  inline constexpr bool isZR() const noexcept { return id() == kIdZr; }
+  inline constexpr bool isSP() const noexcept { return id() == kIdSp; }
+
+  //! Cast this register to a 32-bit R|W.
+  inline GpW w() const noexcept;
+  //! Cast this register to a 64-bit X.
+  inline GpX x() const noexcept;
+};
+
+//! Vector register (ARM).
+class Vec : public Reg {
+public:
+  ASMJIT_DEFINE_ABSTRACT_REG(Vec, Reg)
+
+  //! Additional signature bits used by arm::Vec.
+  enum AdditionalBits : uint32_t {
+    // Register element type (3 bits).
+    // |........|........|.XXX....|........|
+    kSignatureRegElementTypeShift = 12,
+    kSignatureRegElementTypeMask = 0x07 << kSignatureRegElementTypeShift,
+
+    // Register has element index (1 bit).
+    // |........|........|X.......|........|
+    kSignatureRegElementFlagShift = 15,
+    kSignatureRegElementFlagMask = 0x01 << kSignatureRegElementFlagShift,
+
+    // Register element index (4 bits).
+    // |........|....XXXX|........|........|
+    kSignatureRegElementIndexShift = 16,
+    kSignatureRegElementIndexMask = 0x0F << kSignatureRegElementIndexShift
+  };
+
+  //! Element type.
+  enum ElementType : uint32_t {
+    //! No element type specified.
+    kElementTypeNone = 0,
+    //! Byte elements (B8 or B16).
+    kElementTypeB,
+    //! Halfword elements (H4 or H8).
+    kElementTypeH,
+    //! Singleword elements (S2 or S4).
+    kElementTypeS,
+    //! Doubleword elements (D2).
+    kElementTypeD,
+    //! Byte elements grouped by 4 bytes (B4).
+    //!
+    //! \note This element-type is only used by few instructions.
+    kElementTypeB4,
+    //! Halfword elements grouped by 2 halfwords (H2).
+    //!
+    //! \note This element-type is only used by few instructions.
+    kElementTypeH2,
+
+    //! Count of element types.
+    kElementTypeCount
+  };
+
+  //! \cond
+  //! Shortcuts.
+  enum SignatureReg : uint32_t {
+    kSignatureElementB  = kElementTypeB  << kSignatureRegElementTypeShift,
+    kSignatureElementH  = kElementTypeH  << kSignatureRegElementTypeShift,
+    kSignatureElementS  = kElementTypeS  << kSignatureRegElementTypeShift,
+    kSignatureElementD  = kElementTypeD  << kSignatureRegElementTypeShift,
+    kSignatureElementB4 = kElementTypeB4 << kSignatureRegElementTypeShift,
+    kSignatureElementH2 = kElementTypeH2 << kSignatureRegElementTypeShift
+  };
+  //! \endcond
+
+  //! Returns whether the register has associated an element type.
+  inline constexpr bool hasElementType() const noexcept { return _signature.hasField<kSignatureRegElementTypeMask>(); }
+  //! Returns whether the register has element index (it's an element index access).
+  inline constexpr bool hasElementIndex() const noexcept { return _signature.hasField<kSignatureRegElementFlagMask>(); }
+  //! Returns whether the reggister has element type or element index (or both).
+  inline constexpr bool hasElementTypeOrIndex() const noexcept { return _signature.hasField<kSignatureRegElementTypeMask | kSignatureRegElementFlagMask>(); }
+
+  //! Returns element type of the register.
+  inline constexpr uint32_t elementType() const noexcept { return _signature.getField<kSignatureRegElementTypeMask>(); }
+  //! Sets element type of the register to `elementType`.
+  inline void setElementType(uint32_t elementType) noexcept { _signature.setField<kSignatureRegElementTypeMask>(elementType); }
+  //! Resets element type to none.
+  inline void resetElementType() noexcept { _signature.setField<kSignatureRegElementTypeMask>(0); }
+
+  //! Returns element index of the register.
+  inline constexpr uint32_t elementIndex() const noexcept { return _signature.getField<kSignatureRegElementIndexMask>(); }
+  //! Sets element index of the register to `elementType`.
+  inline void setElementIndex(uint32_t elementIndex) noexcept {
+    _signature |= kSignatureRegElementFlagMask;
+    _signature.setField<kSignatureRegElementIndexMask>(elementIndex);
+  }
+  //! Resets element index of the register.
+  inline void resetElementIndex() noexcept {
+    _signature &= ~(kSignatureRegElementFlagMask | kSignatureRegElementIndexMask);
+  }
+
+  inline constexpr bool isVecB8() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecD>::kSignature | kSignatureElementB); }
+  inline constexpr bool isVecH4() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecD>::kSignature | kSignatureElementH); }
+  inline constexpr bool isVecS2() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecD>::kSignature | kSignatureElementS); }
+  inline constexpr bool isVecD1() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecD>::kSignature); }
+
+  inline constexpr bool isVecB16() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementB); }
+  inline constexpr bool isVecH8() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementH); }
+  inline constexpr bool isVecS4() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementS); }
+  inline constexpr bool isVecD2() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementD); }
+  inline constexpr bool isVecB4x4() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementB4); }
+  inline constexpr bool isVecH2x4() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementH2); }
+
+  //! Creates a cloned register with element access.
+  inline Vec at(uint32_t elementIndex) const noexcept {
+    return Vec((signature() & ~kSignatureRegElementIndexMask) | (elementIndex << kSignatureRegElementIndexShift) | kSignatureRegElementFlagMask, id());
+  }
+
+  //! Cast this register to an 8-bit B register (scalar).
+  inline VecB b() const noexcept;
+  //! Cast this register to a 16-bit H register (scalar).
+  inline VecH h() const noexcept;
+  //! Cast this register to a 32-bit S register (scalar).
+  inline VecS s() const noexcept;
+  //! Cast this register to a 64-bit D register (scalar).
+  inline VecD d() const noexcept;
+  //! Cast this register to a 128-bit Q register (scalar).
+  inline VecV q() const noexcept;
+  //! Cast this register to a 128-bit V register.
+  inline VecV v() const noexcept;
+
+  //! Cast this register to a 128-bit V.B[elementIndex] register.
+  inline VecV b(uint32_t elementIndex) const noexcept;
+  //! Cast this register to a 128-bit V.H[elementIndex] register.
+  inline VecV h(uint32_t elementIndex) const noexcept;
+  //! Cast this register to a 128-bit V.S[elementIndex] register.
+  inline VecV s(uint32_t elementIndex) const noexcept;
+  //! Cast this register to a 128-bit V.D[elementIndex] register.
+  inline VecV d(uint32_t elementIndex) const noexcept;
+  //! Cast this register to a 128-bit V.H2[elementIndex] register.
+  inline VecV h2(uint32_t elementIndex) const noexcept;
+  //! Cast this register to a 128-bit V.B4[elementIndex] register.
+  inline VecV b4(uint32_t elementIndex) const noexcept;
+
+  //! Cast this register to V.8B.
+  inline VecD b8() const noexcept;
+  //! Cast this register to V.16B.
+  inline VecV b16() const noexcept;
+  //! Cast this register to V.2H.
+  inline VecS h2() const noexcept;
+  //! Cast this register to V.4H.
+  inline VecD h4() const noexcept;
+  //! Cast this register to V.8H.
+  inline VecV h8() const noexcept;
+  //! Cast this register to V.2S.
+  inline VecD s2() const noexcept;
+  //! Cast this register to V.4S.
+  inline VecV s4() const noexcept;
+  //! Cast this register to V.2D.
+  inline VecV d2() const noexcept;
+
+  static inline constexpr OperandSignature _makeElementAccessSignature(uint32_t elementType, uint32_t elementIndex) noexcept {
+    return OperandSignature{
+      uint32_t(RegTraits<RegType::kARM_VecV>::kSignature)      |
+      uint32_t(kSignatureRegElementFlagMask)                   |
+      uint32_t(elementType << kSignatureRegElementTypeShift)   |
+      uint32_t(elementIndex << kSignatureRegElementIndexShift)};
+  }
+};
+
+//! 32-bit GPW (AArch64) and/or GPR (ARM/AArch32) register.
+class GpW : public Gp { ASMJIT_DEFINE_FINAL_REG(GpW, Gp, RegTraits<RegType::kARM_GpW>) };
+//! 64-bit GPX (AArch64) register.
+class GpX : public Gp { ASMJIT_DEFINE_FINAL_REG(GpX, Gp, RegTraits<RegType::kARM_GpX>) };
+
+//! 8-bit view (S) of VFP/SIMD register.
+class VecB : public Vec { ASMJIT_DEFINE_FINAL_REG(VecB, Vec, RegTraits<RegType::kARM_VecB>) };
+//! 16-bit view (S) of VFP/SIMD register.
+class VecH : public Vec { ASMJIT_DEFINE_FINAL_REG(VecH, Vec, RegTraits<RegType::kARM_VecH>) };
+//! 32-bit view (S) of VFP/SIMD register.
+class VecS : public Vec { ASMJIT_DEFINE_FINAL_REG(VecS, Vec, RegTraits<RegType::kARM_VecS>) };
+//! 64-bit view (D) of VFP/SIMD register.
+class VecD : public Vec { ASMJIT_DEFINE_FINAL_REG(VecD, Vec, RegTraits<RegType::kARM_VecD>) };
+//! 128-bit vector register (Q or V).
+class VecV : public Vec { ASMJIT_DEFINE_FINAL_REG(VecV, Vec, RegTraits<RegType::kARM_VecV>) };
+
+inline GpW Gp::w() const noexcept { return GpW(id()); }
+inline GpX Gp::x() const noexcept { return GpX(id()); }
+
+inline VecB Vec::b() const noexcept { return VecB(id()); }
+inline VecH Vec::h() const noexcept { return VecH(id()); }
+inline VecS Vec::s() const noexcept { return VecS(id()); }
+inline VecD Vec::d() const noexcept { return VecD(id()); }
+inline VecV Vec::q() const noexcept { return VecV(id()); }
+inline VecV Vec::v() const noexcept { return VecV(id()); }
+
+inline VecV Vec::b(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeB, elementIndex), id()); }
+inline VecV Vec::h(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeH, elementIndex), id()); }
+inline VecV Vec::s(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeS, elementIndex), id()); }
+inline VecV Vec::d(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeD, elementIndex), id()); }
+inline VecV Vec::h2(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeH2, elementIndex), id()); }
+inline VecV Vec::b4(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeB4, elementIndex), id()); }
+
+inline VecD Vec::b8() const noexcept { return VecD(OperandSignature{VecD::kSignature | kSignatureElementB}, id()); }
+inline VecS Vec::h2() const noexcept { return VecS(OperandSignature{VecS::kSignature | kSignatureElementH}, id()); }
+inline VecD Vec::h4() const noexcept { return VecD(OperandSignature{VecD::kSignature | kSignatureElementH}, id()); }
+inline VecD Vec::s2() const noexcept { return VecD(OperandSignature{VecD::kSignature | kSignatureElementS}, id()); }
+inline VecV Vec::b16() const noexcept { return VecV(OperandSignature{VecV::kSignature | kSignatureElementB}, id()); }
+inline VecV Vec::h8() const noexcept { return VecV(OperandSignature{VecV::kSignature | kSignatureElementH}, id()); }
+inline VecV Vec::s4() const noexcept { return VecV(OperandSignature{VecV::kSignature | kSignatureElementS}, id()); }
+inline VecV Vec::d2() const noexcept { return VecV(OperandSignature{VecV::kSignature | kSignatureElementD}, id()); }
+
+#ifndef _DOXYGEN
+namespace regs {
+#endif
+
+//! Creates a 32-bit W register operand (ARM/AArch64).
+static inline constexpr GpW w(uint32_t id) noexcept { return GpW(id); }
+//! Creates a 64-bit X register operand (AArch64).
+static inline constexpr GpX x(uint32_t id) noexcept { return GpX(id); }
+//! Creates a 32-bit S register operand (ARM/AArch64).
+static inline constexpr VecS s(uint32_t id) noexcept { return VecS(id); }
+//! Creates a 64-bit D register operand (ARM/AArch64).
+static inline constexpr VecD d(uint32_t id) noexcept { return VecD(id); }
+//! Creates a 1282-bit V register operand (ARM/AArch64).
+static inline constexpr VecV v(uint32_t id) noexcept { return VecV(id); }
+
+#ifndef _DOXYGEN
+} // {regs}
+
+// Make `arm::regs` accessible through `arm` namespace as well.
+using namespace regs;
+#endif
+
+//! Memory operand (ARM).
+class Mem : public BaseMem {
+public:
+  //! \cond INTERNAL
+  //! Additional bits of operand's signature used by `arm::Mem`.
+  enum AdditionalBits : uint32_t {
+    // Index shift value (5 bits).
+    // |........|.....XXX|XX......|........|
+    kSignatureMemShiftValueShift = 14,
+    kSignatureMemShiftValueMask = 0x1Fu << kSignatureMemShiftValueShift,
+
+    // Shift operation type (4 bits).
+    // |........|XXXX....|........|........|
+    kSignatureMemPredicateShift = 20,
+    kSignatureMemPredicateMask = 0x0Fu << kSignatureMemPredicateShift
+  };
+  //! \endcond
+
+  //! Memory offset mode.
+  //!
+  //! Additional constants that can be used with the `predicate`.
+  enum OffsetMode : uint32_t {
+    //! Pre-index "[BASE, #Offset {, <shift>}]!" with write-back.
+    kOffsetPreIndex = 0xE,
+    //! Post-index "[BASE], #Offset {, <shift>}" with write-back.
+    kOffsetPostIndex = 0xF
+  };
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Construct a default `Mem` operand, that points to [0].
+  inline constexpr Mem() noexcept
+    : BaseMem() {}
+
+  inline constexpr Mem(const Mem& other) noexcept
+    : BaseMem(other) {}
+
+  inline explicit Mem(Globals::NoInit_) noexcept
+    : BaseMem(Globals::NoInit) {}
+
+  inline constexpr Mem(const Signature& signature, uint32_t baseId, uint32_t indexId, int32_t offset) noexcept
+    : BaseMem(signature, baseId, indexId, offset) {}
+
+  inline constexpr explicit Mem(const Label& base, int32_t off = 0, Signature signature = Signature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(RegType::kLabelTag) |
+              signature, base.id(), 0, off) {}
+
+  inline constexpr explicit Mem(const BaseReg& base, int32_t off = 0, Signature signature = Signature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(base.type()) |
+              signature, base.id(), 0, off) {}
+
+  inline constexpr Mem(const BaseReg& base, const BaseReg& index, Signature signature = Signature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(base.type()) |
+              Signature::fromMemIndexType(index.type()) |
+              signature, base.id(), index.id(), 0) {}
+
+  inline constexpr Mem(const BaseReg& base, const BaseReg& index, const Shift& shift, Signature signature = Signature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(base.type()) |
+              Signature::fromMemIndexType(index.type()) |
+              Signature::fromValue<kSignatureMemPredicateMask>(uint32_t(shift.op())) |
+              Signature::fromValue<kSignatureMemShiftValueMask>(shift.value()) |
+              signature, base.id(), index.id(), 0) {}
+
+  inline constexpr Mem(uint64_t base, Signature signature = Signature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              signature, uint32_t(base >> 32), 0, int32_t(uint32_t(base & 0xFFFFFFFFu))) {}
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline Mem& operator=(const Mem& other) noexcept = default;
+
+  //! \}
+
+  //! \name Clone
+  //! \{
+
+  //! Clones the memory operand.
+  inline constexpr Mem clone() const noexcept { return Mem(*this); }
+
+  //! Gets new memory operand adjusted by `off`.
+  inline Mem cloneAdjusted(int64_t off) const noexcept {
+    Mem result(*this);
+    result.addOffset(off);
+    return result;
+  }
+
+  //! Clones the memory operand and makes it pre-index.
+  inline Mem pre() const noexcept {
+    Mem result(*this);
+    result.setPredicate(kOffsetPreIndex);
+    return result;
+  }
+
+  //! Clones the memory operand, applies a given offset `off` and makes it pre-index.
+  inline Mem pre(int64_t off) const noexcept {
+    Mem result(*this);
+    result.setPredicate(kOffsetPreIndex);
+    result.addOffset(off);
+    return result;
+  }
+
+  //! Clones the memory operand and makes it post-index.
+  inline Mem post() const noexcept {
+    Mem result(*this);
+    result.setPredicate(kOffsetPreIndex);
+    return result;
+  }
+
+  //! Clones the memory operand, applies a given offset `off` and makes it post-index.
+  inline Mem post(int64_t off) const noexcept {
+    Mem result(*this);
+    result.setPredicate(kOffsetPostIndex);
+    result.addOffset(off);
+    return result;
+  }
+
+  //! \}
+
+  //! \name Base & Index
+  //! \{
+
+  //! Converts memory `baseType` and `baseId` to `arm::Reg` instance.
+  //!
+  //! The memory must have a valid base register otherwise the result will be wrong.
+  inline Reg baseReg() const noexcept { return Reg::fromTypeAndId(baseType(), baseId()); }
+
+  //! Converts memory `indexType` and `indexId` to `arm::Reg` instance.
+  //!
+  //! The memory must have a valid index register otherwise the result will be wrong.
+  inline Reg indexReg() const noexcept { return Reg::fromTypeAndId(indexType(), indexId()); }
+
+  using BaseMem::setIndex;
+
+  inline void setIndex(const BaseReg& index, uint32_t shift) noexcept {
+    setIndex(index);
+    setShift(shift);
+  }
+
+  //! \}
+
+  //! \name ARM Specific Features
+  //! \{
+
+  //! Gets whether the memory operand has shift (aka scale) constant.
+  inline constexpr bool hasShift() const noexcept { return _signature.hasField<kSignatureMemShiftValueMask>(); }
+  //! Gets the memory operand's shift (aka scale) constant.
+  inline constexpr uint32_t shift() const noexcept { return _signature.getField<kSignatureMemShiftValueMask>(); }
+  //! Sets the memory operand's shift (aka scale) constant.
+  inline void setShift(uint32_t shift) noexcept { _signature.setField<kSignatureMemShiftValueMask>(shift); }
+  //! Resets the memory operand's shift (aka scale) constant to zero.
+  inline void resetShift() noexcept { _signature.setField<kSignatureMemShiftValueMask>(0); }
+
+  //! Gets memory predicate (shift mode or offset mode), see \ref ShiftOp and \ref OffsetMode.
+  inline constexpr uint32_t predicate() const noexcept { return _signature.getField<kSignatureMemPredicateMask>(); }
+  //! Sets memory predicate to `predicate`, see `Mem::ShiftOp`.
+  inline void setPredicate(uint32_t predicate) noexcept { _signature.setField<kSignatureMemPredicateMask>(predicate); }
+  //! Resets shift mode to LSL (default).
+  inline void resetPredicate() noexcept { _signature.setField<kSignatureMemPredicateMask>(0); }
+
+  inline constexpr bool isFixedOffset() const noexcept { return predicate() < kOffsetPreIndex; }
+  inline constexpr bool isPreOrPost() const noexcept { return predicate() >= kOffsetPreIndex; }
+  inline constexpr bool isPreIndex() const noexcept { return predicate() == kOffsetPreIndex; }
+  inline constexpr bool isPostIndex() const noexcept { return predicate() == kOffsetPostIndex; }
+
+  inline void resetToFixedOffset() noexcept { resetPredicate(); }
+  inline void makePreIndex() noexcept { setPredicate(kOffsetPreIndex); }
+  inline void makePostIndex() noexcept { setPredicate(kOffsetPostIndex); }
+
+  //! \}
+};
+
+//! Creates `[base.reg, offset]` memory operand (offset mode).
+static inline constexpr Mem ptr(const Gp& base, int32_t offset = 0) noexcept {
+  return Mem(base, offset);
+}
+
+//! Creates `[base.reg, offset]!` memory operand (pre-index mode).
+static inline constexpr Mem ptr_pre(const Gp& base, int32_t offset = 0) noexcept {
+  return Mem(base, offset, OperandSignature::fromValue<Mem::kSignatureMemPredicateMask>(Mem::kOffsetPreIndex));
+}
+
+//! Creates `[base.reg], offset` memory operand (post-index mode).
+static inline constexpr Mem ptr_post(const Gp& base, int32_t offset = 0) noexcept {
+  return Mem(base, offset, OperandSignature::fromValue<Mem::kSignatureMemPredicateMask>(Mem::kOffsetPostIndex));
+}
+
+//! Creates `[base.reg, index]` memory operand.
+static inline constexpr Mem ptr(const Gp& base, const Gp& index) noexcept {
+  return Mem(base, index);
+}
+
+//! Creates `[base.reg], index` memory operand (post-index mode).
+static inline constexpr Mem ptr_post(const Gp& base, const Gp& index) noexcept {
+  return Mem(base, index, OperandSignature::fromValue<Mem::kSignatureMemPredicateMask>(Mem::kOffsetPostIndex));
+}
+
+//! Creates `[base.reg, index, SHIFT_OP #shift]` memory operand.
+static inline constexpr Mem ptr(const Gp& base, const Gp& index, const Shift& shift) noexcept {
+  return Mem(base, index, shift);
+}
+
+//! Creates `[base + offset]` memory operand.
+static inline constexpr Mem ptr(const Label& base, int32_t offset = 0) noexcept {
+  return Mem(base, offset);
+}
+
+// TODO: [ARM] PC + offset address.
+#if 0
+//! Creates `[PC + offset]` (relative) memory operand.
+static inline constexpr Mem ptr(const PC& pc, int32_t offset = 0) noexcept {
+  return Mem(pc, offset);
+}
+#endif
+
+//! Creates `[base]` absolute memory operand.
+//!
+//! \note The concept of absolute memory operands doesn't exist on ARM, the ISA only provides PC relative addressing.
+//! Absolute memory operands can only be used if it's known that the PC relative offset is encodable and that it
+//! would be within the limits. Absolute address is also often output from disassemblers, so AsmJit support it so it
+//! can assemble it back.
+static inline constexpr Mem ptr(uint64_t base) noexcept { return Mem(base); }
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+//! \cond INTERNAL
+ASMJIT_BEGIN_NAMESPACE
+ASMJIT_DEFINE_TYPE_ID(arm::GpW, TypeId::kInt32);
+ASMJIT_DEFINE_TYPE_ID(arm::GpX, TypeId::kInt64);
+ASMJIT_DEFINE_TYPE_ID(arm::VecS, TypeId::kFloat32x1);
+ASMJIT_DEFINE_TYPE_ID(arm::VecD, TypeId::kFloat64x1);
+ASMJIT_DEFINE_TYPE_ID(arm::VecV, TypeId::kInt32x4);
+ASMJIT_END_NAMESPACE
+//! \endcond
+
+#endif // ASMJIT_ARM_ARMOPERAND_H_INCLUDED
diff --git a/lib/lepton/asmjit/asmjit-scope-begin.h b/lib/lepton/asmjit/asmjit-scope-begin.h
new file mode 100644
index 0000000000..93397b584a
--- /dev/null
+++ b/lib/lepton/asmjit/asmjit-scope-begin.h
@@ -0,0 +1,17 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifdef _WIN32
+  #pragma push_macro("min")
+  #pragma push_macro("max")
+
+  #ifdef min
+    #undef min
+  #endif
+
+  #ifdef max
+    #undef max
+  #endif
+#endif
diff --git a/lib/lepton/asmjit/asmjit-scope-end.h b/lib/lepton/asmjit/asmjit-scope-end.h
new file mode 100644
index 0000000000..702cef49f1
--- /dev/null
+++ b/lib/lepton/asmjit/asmjit-scope-end.h
@@ -0,0 +1,9 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifdef _WIN32
+  #pragma pop_macro("min")
+  #pragma pop_macro("max")
+#endif
diff --git a/lib/lepton/asmjit/asmjit.h b/lib/lepton/asmjit/asmjit.h
new file mode 100644
index 0000000000..1cd0651ffe
--- /dev/null
+++ b/lib/lepton/asmjit/asmjit.h
@@ -0,0 +1,33 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// SPDX-License-Identifier: Zlib
+// Official GitHub Repository: https://github.com/asmjit/asmjit
+//
+// Copyright (c) 2008-2021 The AsmJit Authors
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+
+#ifndef ASMJIT_ASMJIT_H_INCLUDED
+#define ASMJIT_ASMJIT_H_INCLUDED
+
+#include "./core.h"
+
+#ifndef ASMJIT_NO_X86
+  #include "./x86.h"
+#endif
+
+#endif // ASMJIT_ASMJIT_H_INCLUDED
diff --git a/lib/lepton/asmjit/core.h b/lib/lepton/asmjit/core.h
new file mode 100644
index 0000000000..4406ed89f3
--- /dev/null
+++ b/lib/lepton/asmjit/core.h
@@ -0,0 +1,1861 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_H_INCLUDED
+#define ASMJIT_CORE_H_INCLUDED
+
+//! Root namespace used by AsmJit.
+namespace asmjit {
+
+//! \mainpage API Reference
+//!
+//! AsmJit C++ API reference documentation generated by Doxygen.
+//!
+//! AsmJit library uses one global namespace called \ref asmjit, which provides the whole functionality. Core
+//! functionality is within \ref asmjit namespace and architecture specific functionality is always in its own
+//! namespace. For example \ref asmjit::x86 provides both 32-bit and 64-bit X86 code generation.
+//!
+//! \section main_groups Documentation Groups
+//!
+//! AsmJit documentation is structured into groups. Groups can be followed in order to learn AsmJit, but knowledge
+//! from multiple groups is required to use AsmJit properly:
+//!
+//! $$DOCS_GROUP_OVERVIEW$$
+//!
+//! \note It's important to understand that in order to learn AsmJit all groups are important. Some groups can be
+//! omitted if a particular tool is out of interest - for example \ref asmjit_assembler users don't need to know
+//! about \ref asmjit_builder, but it's not the opposite. \ref asmjit_builder users should know about \ref
+//! asmjit_assembler as it also uses operands, labels, and other concepts. Similarly \ref asmjit_compiler users
+//! should know how both \ref asmjit_assembler and \ref asmjit_builder tools work.
+//!
+//! \section where_to_start Where To Start
+//!
+//! AsmJit \ref asmjit_core provides the following two classes that are essential from the code generation perspective:
+//!
+//!   - \ref CodeHolder provides functionality to temporarily hold the generated code. It stores all the necessary
+//!     information about the code - code buffers, sections, labels, symbols, and information about relocations.
+//!
+//!   - \ref BaseEmitter provides interface used by emitter implementations. The interface provides basic building
+//!     blocks that are then implemented by \ref BaseAssembler, \ref BaseBuilder, and \ref BaseCompiler.
+//!
+//! Code emitters:
+//!
+//!  - \ref asmjit_assembler - provides direct machine code generation.
+//!
+//!  - \ref asmjit_builder - provides intermediate code generation that can be processed before it's serialized to
+//!    \ref BaseAssembler.
+//!
+//!  - \ref asmjit_compiler - provides high-level code generation with built-in register allocation.
+//!
+//!  - \ref FuncNode - provides insight into how function looks from the Compiler perspective and how it's stored in
+//!     a node-list.
+//!
+//! \section main_recommendations Recommendations
+//!
+//! The following steps are recommended for all AsmJit users:
+//!
+//!   - Make sure that you use \ref Logger, see \ref asmjit_logging.
+//!
+//!   - Make sure that you use \ref ErrorHandler, see \ref asmjit_error_handling.
+//!
+//!   - Instruction validation in your debug builds can reveal problems too. AsmJit provides validation at instruction
+//!     level that can be enabled via \ref BaseEmitter::addDiagnosticOptions(). See \ref DiagnosticOptions for more
+//!     details.
+//!
+//!   - If you are a Compiler user, use diagnostic options and read carefully if anything suspicious pops out.
+//!     Diagnostic options can be enabled via \ref BaseEmitter::addDiagnosticOptions(). If unsure which ones to use,
+//!     enable annotations and all debug options: `DiagnosticOptions::kRAAnnotate | DiagnosticOptions::kRADebugAll`.
+//!
+//!   - Make sure you put a breakpoint into \ref DebugUtils::errored() function if you have a problem with AsmJit
+//!     returning errors during instruction encoding or register allocation. Having an active breakpoint there can
+//!     help to reveal the origin of the error, to inspect variables and other conditions that caused it.
+//!
+//! The reason for using \ref Logger and \ref ErrorHandler is that they provide a very useful information about what's
+//! happening inside emitters. In many cases the information provided by these two is crucial to quickly identify and
+//! fix issues that happen during development (for example wrong instruction, address, or register used). In addition,
+//! output from \ref Logger is always necessary when filling bug reports. In other words, using logging and proper error
+//! handling can save a lot of time during the development and can also save users from submitting issues.
+//!
+//! \section main_other Other Pages
+//!
+//!   - <a href="annotated.html">Class List</a> - List of classes sorted alphabetically
+//!   - <a href="namespaceasmjit.html">AsmJit Namespace</a> - List of symbols provided by `asmjit` namespace
+
+
+//! \defgroup asmjit_build Build Instructions
+//! \brief Build instructions, supported environments, and feature selection.
+//!
+//! ### Overview
+//!
+//! AsmJit is designed to be easy embeddable in any project. However, it depends on some compile-time definitions that
+//! can be used to enable or disable features to decrease the resulting binary size. A typical way of building AsmJit
+//! is to use [cmake](https://www.cmake.org), but it's also possible to just include AsmJit source code in your project
+//! and to just build it. The easiest way to include AsmJit in your project is to just include **src** directory in
+//! your project and to define \ref ASMJIT_STATIC. AsmJit can be just updated from time to time without any changes to
+//! this integration process. Do not embed AsmJit's `test` files in such case as these are used exclusively for testing.
+//!
+//! ### Supported C++ Compilers
+//!
+//!   - Requirements:
+//!
+//!     - AsmJit won't build without C++11 enabled. If you use older GCC or Clang you would have to enable at least
+//!       C++11 standard through compiler flags.
+//!
+//!   - Tested:
+//!
+//!     - **Clang** - Tested by GitHub Actions - Clang 3.9+ (with C++11 enabled) is officially supported (older Clang
+//!       versions having C++11 support are probably fine, but are not regularly tested).
+//!
+//!     - **GNU** - Tested by GitHub Actions - GCC 4.8+ (with C++11 enabled) is officially supported.
+//!
+//!     - **MINGW** - Should work, but it's not tested in our CI environment.
+//!
+//!     - **MSVC** - Tested by GitHub Actions - VS2017+ is officially supported, VS2015 is reported to work.
+//!
+//!   - Untested:
+//!
+//!     - **Intel** - No maintainers and no CI environment to regularly test this compiler.
+//!
+//!     - **Other** C++ compilers would require basic support in
+//!       [core/api-config.h](https://github.com/asmjit/asmjit/tree/master/src/asmjit/core/api-config.h).
+//!
+//! ### Supported Operating Systems and Platforms
+//!
+//!   - Tested:
+//!
+//!     - **Linux** - Tested by GitHub Actions (any distribution is generally supported).
+//!
+//!     - **Mac OS** - Tested by GitHub Actions (any version is supported).
+//!
+//!     - **Windows** - Tested by GitHub Actions - (Windows 7+ is officially supported).
+//!
+//!     - **Emscripten** - Works if compiled with \ref ASMJIT_NO_JIT. AsmJit cannot generate WASM code, but can be
+//!       used to generate X86/X64 code within a browser, for example.
+//!
+//!   - Untested:
+//!
+//!     - **BSDs** - No maintainers, no CI environment to regularly test BSDs, but they should work out of box.
+//!
+//!     - **Haiku** - Not regularly tested, but reported to work.
+//!
+//!     - **Other** operating systems would require some testing and support in the following files:
+//!       - [core/api-config.h](https://github.com/asmjit/asmjit/tree/master/src/asmjit/core/api-config.h)
+//!       - [core/osutils.cpp](https://github.com/asmjit/asmjit/tree/master/src/asmjit/core/osutils.cpp)
+//!       - [core/virtmem.cpp](https://github.com/asmjit/asmjit/tree/master/src/asmjit/core/virtmem.cpp)
+//!
+//! ### Supported Backends / Architectures
+//!
+//!   - **X86** and **X86_64** - Both 32-bit and 64-bit backends tested on CI.
+//!   - **AArch64** - AArch64 backend is currently only partially tested (there is no native AArch64 runner to test
+//!     AsmJit Builder/Compiler)
+//!
+//! ### Static Builds and Embedding
+//!
+//! These definitions can be used to enable static library build. Embed is used when AsmJit's source code is embedded
+//! directly in another project, implies static build as well.
+//!
+//!   - \ref ASMJIT_EMBED - Asmjit is embedded, implies \ref ASMJIT_STATIC.
+//!   - \ref ASMJIT_STATIC - Enable static-library build.
+//!
+//! \note Projects that use AsmJit statically must define \ref ASMJIT_STATIC in all compilation units that use AsmJit,
+//! otherwise AsmJit would use dynamic library imports in \ref ASMJIT_API decorator. The recommendation is to define
+//! this macro across the whole project that uses AsmJit this way.
+//!
+//! ### Build Configuration
+//!
+//! These definitions control whether asserts are active or not. By default AsmJit would autodetect build configuration
+//! from existing pre-processor definitions, but this behavior can be overridden, for example to enable debug asserts
+//! in release configuration.
+//!
+//!   - \ref ASMJIT_BUILD_DEBUG - Overrides build configuration to debug, asserts will be enabled in this case.
+//!   - \ref ASMJIT_BUILD_RELEASE - Overrides build configuration to release, asserts will be disabled in this case.
+//!
+//! \note There is usually no need to override the build configuration. AsmJit detects the build configuration by
+//! checking whether `NDEBUG` is defined and automatically defines \ref ASMJIT_BUILD_RELEASE if configuration overrides
+//! were not used. We only recommend using build configuration overrides in special situations, like using AsmJit in
+//! release configuration with asserts enabled for whatever reason.
+//!
+//! ### AsmJit Backends
+//!
+//! AsmJit currently supports only X86/X64 backend, but the plan is to add more backends in the future. By default
+//! AsmJit builds only the host backend, which is autodetected at compile-time, but this can be overridden.
+//!
+//!   - \ref ASMJIT_NO_X86 - Disable X86/X64 backends.
+//!   - \ref ASMJIT_NO_FOREIGN - Disables the support for foreign architectures.
+//!
+//! ### Features Selection
+//!
+//! AsmJit builds by defaults all supported features, which includes all emitters, logging, instruction validation and
+//! introspection, and JIT memory allocation. Features can be disabled at compile time by using `ASMJIT_NO_...`
+//! definitions.
+//!
+//!   - \ref ASMJIT_NO_DEPRECATED - Disables deprecated API at compile time so it won't be available and the
+//!     compilation will fail if there is attempt to use such API. This includes deprecated classes, namespaces,
+//!     enumerations, and functions.
+//!
+//!   - \ref ASMJIT_NO_BUILDER - Disables \ref asmjit_builder functionality completely. This implies \ref
+//!     ASMJIT_NO_COMPILER as \ref asmjit_compiler cannot be used without \ref asmjit_builder.
+//!
+//!   - \ref ASMJIT_NO_COMPILER - Disables \ref asmjit_compiler functionality completely.
+//!
+//!   - \ref ASMJIT_NO_JIT - Disables JIT memory management and \ref JitRuntime.
+//!
+//!   - \ref ASMJIT_NO_LOGGING - Disables \ref Logger and \ref Formatter.
+//!
+//!   - \ref ASMJIT_NO_TEXT - Disables everything that contains string representation of AsmJit constants, should
+//!     be used together with \ref ASMJIT_NO_LOGGING as logging doesn't make sense without the ability to query
+//!     instruction names, register names, etc...
+//!
+//!   - \ref ASMJIT_NO_VALIDATION - Disables validation API.
+//!
+//!   - \ref ASMJIT_NO_INTROSPECTION - Disables instruction introspection API, must be used together with \ref
+//!     ASMJIT_NO_COMPILER as \ref asmjit_compiler requires introspection for its liveness analysis and register
+//!     allocation.
+//!
+//! \note It's not recommended to disable features if you plan to build AsmJit as a shared library that will be
+//! used by multiple projects that you don't control how AsmJit was built (for example AsmJit in a Linux distribution).
+//! The possibility to disable certain features exists mainly for customized AsmJit builds.
+
+
+//! \defgroup asmjit_breaking_changes Breaking Changes
+//! \brief Documentation of breaking changes
+//!
+//! ### Overview
+//!
+//! AsmJit is a live project that is being actively developed. Deprecating the existing API in favor of a new
+//! one is preferred, but it's not always possible if the changes are significant. AsmJit authors prefer to do
+//! accumulated breaking changes at once instead of breaking the API often. This page documents deprecated and
+//! removed APIs and should serve as a how-to guide for people that want to port existing code to work with the
+//! newest AsmJit.
+//!
+//! ### Tips
+//!
+//! Useful tips before you start:
+//!
+//!   - Visit our [Public Gitter Channel](https://gitter.im/asmjit/asmjit) if you need a quick help.
+//!
+//!   - Build AsmJit with `ASMJIT_NO_DEPRECATED` macro defined to make sure that you are not using deprecated
+//!     functionality at all. Deprecated functions are decorated with `ASMJIT_DEPRECATED()` macro, but sometimes
+//!     it's not possible to decorate everything like classes, which are used by deprecated functions as well,
+//!     because some compilers would warn about that. If your project compiles fine with `ASMJIT_NO_DEPRECATED`
+//!     it's not using anything, which was deprecated.
+//!
+//! ### Changes committed at 2021-12-13
+//!
+//! Core changes:
+//!
+//!   - Removed old deprecated API.
+//!
+//!   - Many enumerations were changed to enum class, and many public APIs were changed to use such enums instead
+//!     of uint32_t. This change makes some APIs backward incompatible - there are no deprecations this time.
+//!
+//!   - Extracted operand signature manipulation to `OperandSignature`.
+//!   - Setting function arguments through `Compiler::setArg()` was deprecated, use FuncNode::setArg() instead.
+//!   - Moved `{arch}::Features::k` to `CpuFeatures::{arch}::k`.
+//!   - Moved `BaseEmitter::kEncodingOption` to `EncodingOptions::k`.
+//!   - Moved `BaseEmitter::kFlag` to `EmitterFlags::k`.
+//!   - Moved `BaseEmitter::kType` to `EmitterType::k`.
+//!   - Moved `BaseEmitter::kValidationOption` to `DiagnosticOptions::kValidate`.
+//!   - Moved `BaseFeatures` to `CpuFeatures`.
+//!   - Moved `BaseInst::kControl` to `InstControlFlow::k`.
+//!   - Moved `BaseInst::kOption` and `x86::Inst::kOption` to `InstOptions::k`.
+//!   - Moved `BaseNode::kNode` to `NodeType::k`.
+//!   - Moved `BaseReg::kGroup` and `x86::Reg::kGroup` to `RegGroup::k`.
+//!   - Moved `BaseReg::kType` and `x86::Reg::kType` to `RegType::k`.
+//!   - Moved `CallConv::kFlag` to `CallConvFlags::k`.
+//!   - Moved `CallConv::kId` to `CallConvId::k`.
+//!   - Moved `CallConv::kStrategy` to `CallConvStrategy::k`.
+//!   - Moved `CodeBuffer::kFlag` to `CodeBufferFlags`.
+//!   - Moved `ConstPool::kScope` to `ConstPoolScope::k`.
+//!   - Moved `Environment::kArch` to `Arch::k`.
+//!   - Moved `Environment::kSubArch` to `SubArch::k`.
+//!   - Moved `Environment::kFormat` to `OjectFormat::k`.
+//!   - Moved `Environment::kPlatform` to `Platform::k`.
+//!   - Moved `Environment::kAbi` to `PlatformABI::k`.
+//!   - Moved `Environment::kVendor` to `Vendor::k`.
+//!   - Moved `FormatOptions::kFlag` to `FormatFlags::k` and `DiagnosticOptions::k` (Compiler diagnostics flags).
+//!   - Moved `FormatOptions::kIndentation` to `FormatIndentationGroup::k`.
+//!   - Moved `FuncFrame::kAttr` to `FuncAttributes::k`.
+//!   - Moved `Globals::kReset` to `ResetPolicy::k`.
+//!   - Moved `InstDB::kAvx512Flag` to `InstDB::Avx512Flags::k`.
+//!   - Moved `InstDB::kFlag` to `InstDB::InstFlags::k`.
+//!   - Moved `InstDB::kMemFlag` to `InstDB::OpFlags::kMem`.
+//!   - Moved `InstDB::kMode` to `InstDB::Mode::k`.
+//!   - Moved `InstDB::kOpFlag` to `InstDB::OpFlags::k{OpType}...`.
+//!   - Moved `JitAllocator::kOption` to `JitAllocatorOptions::k`.
+//!   - Moved `Label::kType` to `LabelType::k`.
+//!   - Moved `Operand::kOpType` to `OperandType::k`.
+//!   - Moved `OpRWInfo::kFlag` to `OpRWFlags::k`.
+//!   - Moved `Type::kId` to `TypeId::k`.
+//!   - Moved `VirtMem::k` to `VirtMem::MemoryFlags::k`.
+//!
+//! ### Changes committed at 2020-05-30
+//!
+//! AsmJit has been cleaned up significantly, many todo items have been fixed and many functions and classes have
+//! been redesigned, some in an incompatible way.
+//!
+//! Core changes:
+//!
+//!   - `Imm` operand has now only `Imm::value()` and `Imm::valueAs()` functions that return its value content,
+//!     and `Imm::setValue()` function that sets the content. Functions like `setI8()`, `setU8()` were deprecated.
+//!
+//!     Old functions were deprecated, but code using them should still compile.
+//!
+//!   - `ArchInfo` has been replaced with `Environment`. Environment provides more details about the architecture,
+//!     but drops some properties that were used by arch info - `gpSize(`) and `gpCount()`. `gpSize()` can be replaced
+//!     with `registerSize()` getter, which returns a native register size of the architecture the environment uses.
+//!     However, `gpCount()` was removed - at the moment `ArchTraits` can be used to access such properties.
+//!
+//!     Some other functions were renamed, like `ArchInfo::isX86Family()` is now `Environment::isFamilyX86()`, etc.
+//!     The reason for changing the order was support for more propertries and all the accessors now start with the
+//!     type of the property, like `Environment::isPlatformWindows()`.
+//!
+//!     This function causes many other classes to provide `environment()` getter instead of `archInfo()` getter.
+//!     In addition, AsmJit now uses `arch()` to get an architecture instead of `archId()`. `ArchInfo::kIdXXX` was
+//!     renamed to `Environment::kArchXXX`.
+//!
+//!     Some functions were deprecated, some removed...
+//!
+//!   - `CodeInfo` has been removed in favor of `Environment`. If you used `CodeInfo` to set architecture and base
+//!     address, this is now possible with `Environment` and setting base address explicitly by `CodeHolder::init()`
+//!     - the first argument is `Environment`, and the second argument is base address, which defaults to
+//!     `Globals::kNoBaseAddress`.
+//!
+//!     CodeInfo class was deprecated, but the code using it should still compile with warnings.
+//!
+//!   - `CallConv` has been updated to offer a more unified way of representing calling conventions - many calling
+//!     conventions were abstracted to follow standard naming like `CallConvId::kCDecl` or `CallConvId::kStdCall`.
+//!
+//!     This change means that other APIs like `FuncDetail::init()` now require both, calling convention and target
+//!     `Environment`.
+//!
+//!   - `Logging` namespace has been renamed to `Formatter`, which now provides general functionality for formatting
+//!     in AsmJit.
+//!
+//!     Logging namespace should still work, but its use is deprecated. Unfortunately this will be without deprecation
+//!     warnings, so make sure you don't use it.
+//!
+//!   - `Data64`, `Data128`, and `Data256` structs were deprecated and should no longer be used. There is no replacement,
+//!     AsmJit users should simply create their own structures if they need them or use the new repeated embed API in
+//!     emitters, see `BaseEmitter::embedDataArray()`.
+//!
+//! Emitter changes:
+//!
+//!   - `BaseEmitter::emit()` function signature has been changed to accept 3 operands by reference and the rest 3
+//!     operands as a continuous array. This change is purely cosmetic and shouldn't affect users as emit() has many
+//!     overloads that dispatch to the right function.
+//!
+//!   - `x86::Emitter` (Assembler, Builder, Compiler) deprecates embed utilities like `dint8()`, `duint8()`, `duint16()`,
+//!     `dxmm()`, etc... in favor of a new and more powerful `BaseEmitter::embedDataArray()`. This function also allows
+//!     emitting repeated values and/or patterns, which is used by helpers `BaseEmitter::embedUInt8()`, and others...
+//!
+//!   - Validation is now available through `BaseEmitter::DiagnosticOptions`, which can be enabled/disabled through
+//!     `BaseEmitter::addDiagnosticOptions()` and `BaseEmitter::clearDiagnosticOptions()`, respectively. Validation
+//!     options now separate between encoding and Builder/Compiler so it's possible to choose the granularity required.
+//!
+//! Builder changes:
+//!
+//!   - Internal functions for creating nodes were redesigned. They now accept a pointer to the node created as
+//!     a first parameter. These changes should not affect AsmJit users as these functions were used internally.
+//!
+//! Compiler changes:
+//!
+//!   - `FuncCallNode` has been renamed to `InvokeNode`. Additionally, function calls should now use
+//!     `x86::Compiler::invoke()` instead of `call()`. The reason behind this is to remove the confusion between a
+//!     `call` instruction and AsmJit's `call()` intrinsic, which is now `invoke()`.
+//!
+//!   - Creating new nodes also changed. Now the preferred way of invoking a function is to call
+//!     `x86::Compiler::invoke()` where the first argument is `InvokeNode**`. The function now returns an error and
+//!     would call `ErrorHandler` in case of a failure. Error handling was unspecified in the past - the function was
+//!     marked noexcept, but called error handler, which could throw.
+//!
+//!     The reason behind this change is to make the API consistent with other changes and to also make it possible
+//!     to inspect the possible error. In  the previous API it returned a new node or `nullptr` in case of error,
+//!     which the user couldn't inspect unless there was an attached `ErrorHandler`.
+//!
+//! Samples:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! // The basic setup of JitRuntime and CodeHolder changed, use environment()
+//! // instead of codeInfo().
+//! void basicSetup() {
+//!   JitRuntime rt;
+//!   CodeHolder code(rt.environment());
+//! }
+//!
+//! // Calling a function (Compiler) changed - use invoke() instead of call().
+//! void functionInvocation(x86::Compiler& cc) {
+//!   InvokeNode* invokeNode;
+//!   cc.invoke(&invokeNode, targetOperand, FuncSignatureT<...>(...));
+//! }
+//! ```
+
+
+//! \defgroup asmjit_core Core
+//! \brief Globals, code storage, and emitter interface.
+//!
+//! ### Overview
+//!
+//! AsmJit library uses \ref CodeHolder to hold code during code generation and emitters inheriting from \ref
+//! BaseEmitter to emit code. CodeHolder uses containers to manage its data:
+//!
+//!   - \ref Section - stores information about a code or data section.
+//!   - \ref CodeBuffer - stores actual code or data, part of \ref Section.
+//!   - \ref LabelEntry - stores information about a label - its name, offset, section where it belongs to, and
+//!     other bits.
+//!   - \ref LabelLink - stores information about yet unbound label, which was  already used by the assembler.
+//!   - \ref RelocEntry - stores information about a relocation.
+//!   - \ref AddressTableEntry - stores information about an address, which was used in a jump or call. Such
+//!     address may need relocation.
+//!
+//! To generate code you would need to instantiate at least the following classes:
+//!
+//!   - \ref CodeHolder - to hold code during code generation.
+//!   - \ref BaseEmitter - to emit code into \ref CodeHolder.
+//!   - \ref Target (optional) - most likely \ref JitRuntime to keep the generated code in executable memory. \ref
+//!     Target can be customized by inheriting from it.
+//!
+//! There are also other core classes that are important:
+//!
+//!   - \ref Environment - describes where the code will run. Environment brings the concept of target triples or
+//!     tuples into AsmJit, which means that users can specify target architecture, platform, and ABI.
+//!   - \ref TypeId - encapsulates lightweight type functionality that can be used to describe primitive and vector
+//!     types. Types are used by higher level utilities, for example by \ref asmjit_function and \ref asmjit_compiler.
+//!   - \ref CpuInfo - encapsulates CPU information - stores both CPU information and CPU features described by \ref
+//!     CpuFeatures.
+//!
+//! AsmJit also provides global constants:
+//!
+//!   - \ref Globals - namespace that provides global constants.
+//!   - \ref ByteOrder - byte-order constants and functionality.
+//!
+//! \note CodeHolder examples use \ref x86::Assembler as abstract interfaces cannot be used to generate code.
+//!
+//! ### CodeHolder & Emitters
+//!
+//! The example below shows how the mentioned classes interact to generate X86 code:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef int (*Func)(void);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Runtime specialized for JIT code execution.
+//!
+//!   CodeHolder code;                  // Holds code and relocation information.
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to code.
+//!   a.mov(x86::eax, 1);               // Move one to eax register.
+//!   a.ret();                          // Return from function.
+//!   // ===== x86::Assembler is no longer needed from here and can be destroyed =====
+//!
+//!   Func fn;                          // Holds address to the generated function.
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!   // ===== CodeHolder is no longer needed from here and can be destroyed =====
+//!
+//!   int result = fn();                // Execute the generated code.
+//!   printf("%d\n", result);           // Print the resulting "1".
+//!
+//!   // All classes use RAII, all resources will be released before `main()` returns,
+//!   // the generated function can be, however, released explicitly if you intend to
+//!   // reuse or keep the runtime alive, which you should in a production-ready code.
+//!   rt.release(fn);
+//!
+//!   return 0;
+//! }
+//! ```
+//!
+//! The example above used \ref x86::Assembler as an emitter. AsmJit provides the following emitters that offer various
+//! levels of abstraction:
+//!
+//!   - \ref asmjit_assembler - Low-level emitter that emits directly to \ref CodeBuffer.
+//!   - \ref asmjit_builder - Low-level emitter that emits to a \ref BaseNode list.
+//!   - \ref asmjit_compiler - High-level emitter that provides register allocation.
+//!
+//! ### Targets and JitRuntime
+//!
+//! AsmJit's \ref Target is an interface that provides basic target abstraction. At the moment AsmJit provides only
+//! one implementation called \ref JitRuntime, which as the name suggests provides JIT code target and execution
+//! runtime. \ref JitRuntime provides all the necessary stuff to implement a simple JIT compiler with basic memory
+//! management. It only provides \ref JitRuntime::add() and \ref JitRuntime::release() functions that are used to
+//! either add code to the runtime or release it. \ref JitRuntime doesn't do any decisions on when the code should be
+//! released, the decision is up to the developer.
+//!
+//! See more at \ref asmjit_virtual_memory group.
+//!
+//! ### More About Environment
+//!
+//! In the previous example the \ref Environment is retrieved from \ref JitRuntime. It's logical as \ref JitRuntime
+//! always returns an \ref Environment that is compatible with the host. For example if your application runs on X86_64
+//! CPU the \ref Environment returned will use \ref Arch::kX64 architecture in contrast to \ref Arch::kX86, which will
+//! be used in 32-bit mode on an X86 target.
+//!
+//! AsmJit allows to setup the \ref Environment manually and to select a different architecture and ABI when necessary.
+//! So let's do something else this time, let's always generate a 32-bit code and print its binary representation. To
+//! do that, we can create our own \ref Environment and initialize it to \ref Arch::kX86.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! int main(int argc, char* argv[]) {
+//!   using namespace asmjit::x86;
+//!
+//!   // Create a custom environment initialized to 32-bit X86 architecture.
+//!   Environment env;
+//!   env.setArch(Arch::kX86);
+//!
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!   code.init(env);                   // Initialize CodeHolder with custom environment.
+//!
+//!   // Generate a 32-bit function that sums 4 floats and looks like:
+//!   //   void func(float* dst, const float* a, const float* b)
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to `code`.
+//!
+//!   a.mov(eax, dword_ptr(esp, 4));    // Load the destination pointer.
+//!   a.mov(ecx, dword_ptr(esp, 8));    // Load the first source pointer.
+//!   a.mov(edx, dword_ptr(esp, 12));   // Load the second source pointer.
+//!
+//!   a.movups(xmm0, ptr(ecx));         // Load 4 floats from [ecx] to XMM0.
+//!   a.movups(xmm1, ptr(edx));         // Load 4 floats from [edx] to XMM1.
+//!   a.addps(xmm0, xmm1);              // Add 4 floats in XMM1 to XMM0.
+//!   a.movups(ptr(eax), xmm0);         // Store the result to [eax].
+//!   a.ret();                          // Return from function.
+//!
+//!   // We have no Runtime this time, it's on us what we do with the code.
+//!   // CodeHolder stores code in Section, which provides some basic properties
+//!   // and CodeBuffer structure. We are interested in section's CodeBuffer.
+//!   //
+//!   // NOTE: The first section is always '.text', it can be retrieved by
+//!   // code.sectionById(0) or simply by code.textSection().
+//!   CodeBuffer& buffer = code.textSection()->buffer();
+//!
+//!   // Print the machine-code generated or do something else with it...
+//!   //   8B4424048B4C24048B5424040F28010F58010F2900C3
+//!   for (size_t i = 0; i < buffer.length; i++)
+//!     printf("%02X", buffer.data[i]);
+//!
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Explicit Code Relocation
+//!
+//! In addition to \ref Environment, \ref CodeHolder can be configured to specify a base-address (or a virtual base
+//! address in a linker terminology), which could be static (useful when you know the location where the target's
+//! machine code will be) or dynamic. AsmJit assumes dynamic base-address by default and relocates the code held by
+//! \ref CodeHolder to a user provided address on-demand. To be able to relocate to a user provided address it needs
+//! to store some information about relocations, which is represented by \ref RelocEntry. Relocation entries are only
+//! required if you call external functions from the generated code that cannot be encoded by using a 32-bit
+//! displacement (64-bit displacements are not provided by aby supported architecture).
+//!
+//! There is also a concept called \ref LabelLink - label link is a lightweight data structure that doesn't have any
+//! identifier and is stored in \ref LabelEntry as a single-linked list. Label link represents either unbound yet used
+//! label and cross-sections links (only relevant to code that uses multiple sections). Since crossing sections is
+//! something that cannot be resolved immediately these links persist until offsets of these sections are assigned and
+//! until \ref CodeHolder::resolveUnresolvedLinks() is called. It's an error if you end up with code that has
+//! unresolved label links after flattening. You can verify it by calling \ref CodeHolder::hasUnresolvedLinks(), which
+//! inspects the value returned by \ref CodeHolder::unresolvedLinkCount().
+//!
+//! AsmJit can flatten code that uses multiple sections by assigning each section an incrementing offset that respects
+//! its alignment. Use \ref CodeHolder::flatten() to do that. After the sections are flattened their offsets and
+//! virtual sizes are adjusted to respect each section's buffer size and alignment. The \ref
+//! CodeHolder::resolveUnresolvedLinks() function must be called before relocating the code held by \ref CodeHolder.
+//! You can also flatten your code manually by iterating over all sections and calculating their offsets (relative to
+//! base) by your own algorithm. In that case \ref CodeHolder::flatten() should not be called, however,
+//! \ref CodeHolder::resolveUnresolvedLinks() should be.
+//!
+//! The example below shows how to use a built-in virtual memory allocator \ref JitAllocator instead of using \ref
+//! JitRuntime (just in case you want to use your own memory management) and how to relocate the generated code
+//! into your own memory block - you can use your own virtual memory allocator if you prefer that, but that's OS
+//! specific and not covered by the documentation.
+//!
+//! The following code is similar to the previous one, but implements a function working in both 32-bit and 64-bit
+//! environments:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
+//!
+//! int main() {
+//!   // Create a custom environment that matches the current host environment.
+//!   Environment env = Environment::host();
+//!
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!   code.init(env);                   // Initialize CodeHolder with environment.
+//!
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to `code`.
+//!
+//!   // Signature: 'void func(int* dst, const int* a, const int* b)'.
+//!   x86::Gp dst;
+//!   x86::Gp src_a;
+//!   x86::Gp src_b;
+//!
+//!   // Handle the difference between 32-bit and 64-bit calling conventions
+//!   // (arguments passed through stack vs. arguments passed by registers).
+//!   if (env.is32Bit()) {
+//!     dst   = x86::eax;
+//!     src_a = x86::ecx;
+//!     src_b = x86::edx;
+//!     a.mov(dst  , x86::dword_ptr(x86::esp, 4));
+//!     a.mov(src_a, x86::dword_ptr(x86::esp, 8));
+//!     a.mov(src_b, x86::dword_ptr(x86::esp, 12));
+//!   }
+//!   else {
+//!     if (env.isPlatformWindows()) {
+//!       dst   = x86::rcx;             // First argument  (destination pointer).
+//!       src_a = x86::rdx;             // Second argument (source 'a' pointer).
+//!       src_b = x86::r8;              // Third argument  (source 'b' pointer).
+//!     }
+//!     else {
+//!       dst   = x86::rdi;             // First argument  (destination pointer).
+//!       src_a = x86::rsi;             // Second argument (source 'a' pointer).
+//!       src_b = x86::rdx;             // Third argument  (source 'b' pointer).
+//!     }
+//!   }
+//!
+//!   a.movdqu(x86::xmm0, x86::ptr(src_a)); // Load 4 ints from [src_a] to XMM0.
+//!   a.movdqu(x86::xmm1, x86::ptr(src_b)); // Load 4 ints from [src_b] to XMM1.
+//!   a.paddd(x86::xmm0, x86::xmm1);        // Add 4 ints in XMM1 to XMM0.
+//!   a.movdqu(x86::ptr(dst), x86::xmm0);   // Store the result to [dst].
+//!   a.ret();                              // Return from function.
+//!
+//!   // Even when we didn't use multiple sections AsmJit could insert one section
+//!   // called '.addrtab' (address table section), which would be filled by data
+//!   // required by relocations (absolute jumps and calls). You can omit this code
+//!   // if you are 100% sure your code doesn't contain multiple sections and
+//!   // such relocations. You can use `CodeHolder::hasAddressTable()` to verify
+//!   // whether the address table section does exist.
+//!   code.flatten();
+//!   code.resolveUnresolvedLinks();
+//!
+//!   // After the code was generated it can be relocated manually to any memory
+//!   // location, however, we need to know it's size before we perform memory
+//!   // allocation. `CodeHolder::codeSize()` returns the worst estimated code
+//!   // size in case that relocations are not possible without trampolines (in
+//!   // that case some extra code at the end of the current code buffer is
+//!   // generated during relocation).
+//!   size_t estimatedSize = code.codeSize();
+//!
+//!   // Instead of rolling up our own memory allocator we can use the one AsmJit
+//!   // provides. It's decoupled so you don't need to use `JitRuntime` for that.
+//!   JitAllocator allocator;
+//!
+//!   // Allocate an executable virtual memory and handle a possible failure.
+//!   void* p = allocator.alloc(estimatedSize);
+//!   if (!p)
+//!     return 0;
+//!
+//!   // Now relocate the code to the address provided by the memory allocator.
+//!   // Please note that this DOESN'T COPY anything to `p`. This function will
+//!   // store the address in CodeHolder and use relocation entries to patch the
+//!   // existing code in all sections to respect the base address provided.
+//!   code.relocateToBase((uint64_t)p);
+//!
+//!   // This is purely optional. There are cases in which the relocation can omit
+//!   // unneeded data, which would shrink the size of address table. If that
+//!   // happened the codeSize returned after relocateToBase() would be smaller
+//!   // than the originally `estimatedSize`.
+//!   size_t codeSize = code.codeSize();
+//!
+//!   // This will copy code from all sections to `p`. Iterating over all sections
+//!   // and calling `memcpy()` would work as well, however, this function supports
+//!   // additional options that can be used to also zero pad sections' virtual
+//!   // size, etc.
+//!   //
+//!   // With some additional features, copyFlattenData() does roughly this:
+//!   //   for (Section* section : code.sections())
+//!   //     memcpy((uint8_t*)p + section->offset(),
+//!   //            section->data(),
+//!   //            section->bufferSize());
+//!   code.copyFlattenedData(p, codeSize, CopySectionFlags::kPadSectionBuffer);
+//!
+//!   // Execute the generated function.
+//!   int inA[4] = { 4, 3, 2, 1 };
+//!   int inB[4] = { 1, 5, 2, 8 };
+//!   int out[4];
+//!
+//!   // This code uses AsmJit's ptr_as_func<> to cast between void* and SumIntsFunc.
+//!   ptr_as_func<SumIntsFunc>(p)(out, inA, inB);
+//!
+//!   // Prints {5 8 4 9}
+//!   printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]);
+//!
+//!   // Release 'p' is it's no longer needed. It will be destroyed with 'vm'
+//!   // instance anyway, but it's a good practice to release it explicitly
+//!   // when you know that the function will not be needed anymore.
+//!   allocator.release(p);
+//!
+//!   return 0;
+//! }
+//! ```
+//!
+//! If you know the base-address in advance (before the code generation) it can be passed as a second argument to
+//! \ref CodeHolder::init(). In that case the Assembler will know the absolute position of each instruction and
+//! would be able to use it during instruction encoding to prevent relocations where possible. The following example
+//! shows how to configure the base address:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! void initializeCodeHolder(CodeHolder& code) {
+//!   Environment env = Environment::host();
+//!   uint64_t baseAddress = uint64_t(0x1234);
+//!
+//!   // initialize CodeHolder with environment and custom base address.
+//!   code.init(env, baseAddress);
+//! }
+//! ```
+//!
+//! ### Label Offsets and Links
+//!
+//! When a label that is not yet bound is used by the Assembler, it creates a \ref LabelLink, which is then added to
+//! a \ref LabelEntry. These links are also created if a label is used in a different section than in which it was
+//! bound. Let's examine some functions that can be used to check whether there are any unresolved links.
+//!
+//! ```
+//! #include <asmjit/core.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! void labelLinksExample(CodeHolder& code, const Label& label) {
+//!   // Tests whether the `label` is bound.
+//!   bool isBound = code.isLabelBound(label);
+//!   printf("Label %u is %s\n", label.id(), isBound ? "bound" : "not bound");
+//!
+//!   // Returns true if the code contains either referenced, but unbound
+//!   // labels, or cross-section label links that are not resolved yet.
+//!   bool hasUnresolved = code.hasUnresolvedLinks();  // Boolean answer.
+//!   size_t nUnresolved = code.unresolvedLinkCount(); // Count of unresolved links.
+//!
+//!   printf("Number of unresolved links: %zu\n", nUnresolved);
+//! }
+//! ```
+//!
+//! There is no function that would return the number of unbound labels as this is completely unimportant from
+//! CodeHolder's perspective. If a label is not used then it doesn't matter whether it's bound or not, only actually
+//! used labels matter. After a Label is bound it's possible to query its offset offset relative to the start of the
+//! section where it was bound:
+//!
+//! ```
+//! #include <asmjit/core.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! void labelOffsetExample(CodeHolder& code, const Label& label) {
+//!   // Label offset is known after it's bound. The offset provided is relative
+//!   // to the start of the section, see below for alternative. If the given
+//!   // label is not bound the offset returned will be zero. It's recommended
+//!   // to always check whether the label is bound before using its offset.
+//!   uint64_t sectionOffset = code.labelOffset(label);
+//!   printf("Label offset relative to section: %llu\n", (unsigned long long)sectionOffset);
+//!
+//!   // If you use multiple sections and want the offset relative to the base.
+//!   // NOTE: This function expects that the section has already an offset and
+//!   // the label-link was resolved (if this is not true you will still get an
+//!   // offset relative to the start of the section).
+//!   uint64_t baseOffset = code.labelOffsetFromBase(label);
+//!   printf("Label offset relative to base: %llu\n", (unsigned long long)baseOffset);
+//! }
+//! ```
+//!
+//! ### Sections
+//!
+//! AsmJit allows to create multiple sections within the same \ref CodeHolder. A test-case
+//! [asmjit_test_x86_sections.cpp](https://github.com/asmjit/asmjit/blob/master/test/asmjit_test_x86_sections.cpp)
+//! can be used as a reference point although the following example should also provide a useful insight:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! void sectionsExample(CodeHolder& code) {
+//!   // Text section is always provided as the first section.
+//!   Section* text = code.textSection(); // or code.sectionById(0);
+//!
+//!   // To create another section use CodeHolder::newSection().
+//!   Section* data;
+//!   Error err = code.newSection(&data,
+//!     ".data",                // Section name
+//!     SIZE_MAX,               // Name length if the name is not null terminated (or SIZE_MAX).
+//!     SectionFlags::kNone,    // Section flags, see SectionFlags.
+//!     8,                      // Section alignment, must be power of 2.
+//!     0);                     // Section order value (optional, default 0).
+//!
+//!   // When you switch sections in Assembler, Builder, or Compiler the cursor
+//!   // will always move to the end of that section. When you create an Assembler
+//!   // the cursor would be placed at the end of the first (.text) section, which
+//!   // is initially empty.
+//!   x86::Assembler a(&code);
+//!   Label L_Data = a.newLabel();
+//!
+//!   a.mov(x86::eax, x86::ebx); // Emits in .text section.
+//!
+//!   a.section(data);           // Switches to the end of .data section.
+//!   a.bind(L_Data);            // Binds label in this .data section
+//!   a.db(0x01);                // Emits byte in .data section.
+//!
+//!   a.section(text);           // Switches to the end of .text section.
+//!   a.add(x86::ebx, x86::eax); // Emits in .text section.
+//!
+//!   // References a label in .text section, which was bound in .data section.
+//!   // This would create a LabelLink even when the L_Data is already bound,
+//!   // because the reference crosses sections. See below...
+//!   a.lea(x86::rsi, x86::ptr(L_Data));
+//! }
+//! ```
+//!
+//! The last line in the example above shows that a LabelLink would be created even for bound labels that cross
+//! sections. In this case a referenced label was bound in another section, which means that the link couldn't be
+//! resolved at that moment. If your code uses sections, but you wish AsmJit to flatten these sections (you don't
+//! plan to flatten them manually) then there is an API for that.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // ... (continuing the previous example) ...
+//! void sectionsExampleContinued(CodeHolder& code) {
+//!   // Suppose we have some code that contains multiple sections and
+//!   // we would like to flatten it by using AsmJit's built-in API:
+//!   Error err = code.flatten();
+//!   if (err) {
+//!     // There are many reasons it can fail, so always handle a possible error.
+//!     printf("Failed to flatten the code: %s\n", DebugUtils::errorAsString(err));
+//!     exit(1);
+//!   }
+//!
+//!   // After flattening all sections would contain assigned offsets
+//!   // relative to base. Offsets are 64-bit unsigned integers so we
+//!   // cast them to `size_t` for simplicity. On 32-bit targets it's
+//!   // guaranteed that the offset cannot be greater than `2^32 - 1`.
+//!   printf("Data section offset %zu", size_t(data->offset()));
+//!
+//!   // The flattening doesn't resolve unresolved label links, this
+//!   // has to be done manually as flattening can be done separately.
+//!   err = code.resolveUnresolvedLinks();
+//!   if (err) {
+//!     // This is the kind of error that should always be handled...
+//!     printf("Failed to resolve label links: %s\n", DebugUtils::errorAsString(err));
+//!     exit(1);
+//!   }
+//!
+//!   if (code.hasUnresolvedLinks()) {
+//!     // This would mean either unbound label or some other issue.
+//!     printf("The code has %zu unbound labels\n", code.unresovedLinkCount());
+//!     exit(1);
+//!   }
+//! }
+//! ```
+
+
+//! \defgroup asmjit_assembler Assembler
+//! \brief Assembler interface and operands.
+//!
+//! ### Overview
+//!
+//! AsmJit's Assembler is used to emit machine code directly into a \ref CodeBuffer. In general, code generation
+//! with assembler requires the knowledge of the following:
+//!
+//!   - \ref BaseAssembler and architecture-specific assemblers:
+//!     - \ref x86::Assembler - Assembler specific to X86 architecture
+//!   - \ref Operand and its variations:
+//!     - \ref BaseReg - Base class for a register operand, inherited by:
+//!        - \ref x86::Reg - Register operand specific to X86 architecture.
+//!     - \ref BaseMem - Base class for a memory operand, inherited by:
+//!        - \ref x86::Mem - Memory operand specific to X86 architecture.
+//!     - \ref Imm - Immediate (value) operand.
+//!     - \ref Label - Label operand.
+//!
+//! \note Assembler examples use \ref x86::Assembler as abstract interfaces cannot be used to generate code.
+//!
+//! ### Operand Basics
+//!
+//! Let's start with operands. \ref Operand is a data structure that defines a data layout of any operand. It can be
+//! inherited, but any class inheriting it cannot add any members to it, only the existing layout can be reused.
+//! AsmJit allows to construct operands dynamically, to store them, and to query a complete information about them
+//! at run-time. Operands are small (always 16 bytes per \ref Operand) and can be copied and passed by value. Please
+//! never allocate individual operands dynamically by using a `new` keyword - it would work, but then you would have
+//! to be responsible for deleting such operands. In AsmJit operands are always part of some other data structures
+//! like \ref InstNode, which is part of \ref asmjit_builder tool.
+//!
+//! Operands contain only identifiers, but not pointers to any code-generation data. For example \ref Label operand
+//! only provides label identifier, but not a pointer to \ref LabelEntry structure. In AsmJit such IDs are used to
+//! link stuff together without having to deal with pointers.
+//!
+//! AsmJit's operands all inherit from a base class called \ref Operand. Operands have the following properties that
+//! are commonly accessible by getters and setters:
+//!
+//!   - \ref Operand - Base operand, which only provides accessors that are common to all operand types.
+//!   - \ref BaseReg - Describes either physical or virtual register. Physical registers have id that matches the
+//!     target's machine id directly whereas virtual registers must be allocated into physical registers by a register
+//!     allocator pass. Register operand provides:
+//!     - Register Type (\ref RegType) - Unique id that describes each possible register provided by the target
+//!       architecture - for example X86 backend provides general purpose registers (GPB-LO, GPB-HI, GPW, GPD, and GPQ)
+//!       and all types of other registers like K, MM, BND, XMM, YMM, ZMM, and TMM.
+//!     - Register Group (\ref RegGroup) - Groups multiple register types under a single group - for example all
+//!       general-purpose registers (of all sizes) on X86 are part of \ref RegGroup::kGp and all SIMD registers
+//!      (XMM, YMM, ZMM) are part of \ref RegGroup::kVec.
+//!     - Register Size - Contains the size of the register in bytes. If the size depends on the mode (32-bit vs
+//!       64-bit) then generally the higher size is used (for example RIP register has size 8 by default).
+//!     - Register Id - Contains physical or virtual id of the register.
+//!   - \ref BaseMem - Used to reference a memory location. Memory operand provides:
+//!     - Base Register - A base register type and id (physical or virtual).
+//!     - Index Register - An index register type and id (physical or virtual).
+//!     - Offset - Displacement or absolute address to be referenced (32-bit if base register is used and 64-bit if
+//!       base register is not used).
+//!     - Flags that can describe various architecture dependent information (like scale and segment-override on X86).
+//!   - \ref Imm - Immediate values are usually part of instructions (encoded within the instruction itself) or data.
+//!   - \ref Label - used to reference a location in code or data. Labels must be created by the \ref BaseEmitter or
+//!     by \ref CodeHolder. Each label has its unique id per \ref CodeHolder instance.
+//!
+//! ### Operand Manipulation
+//!
+//! AsmJit allows to construct operands dynamically, to store them, and to query a complete information about them at
+//! run-time. Operands are small (always 16 bytes per `Operand`) and should be always copied (by value) if you intend
+//! to store them (don't create operands by using `new` keyword, it's not recommended). Operands are safe to be passed
+//! to `memcpy()` and `memset()`, which becomes handy when working with arrays of operands. If you set all members of
+//! an \ref Operand to zero the operand would become NONE operand, which is the same as a default constructed Operand.
+//!
+//! The example below illustrates how operands can be used and modified even without using any other code generation
+//! classes. The example uses X86 architecture-specific operands.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Registers can be copied, it's a common practice.
+//! x86::Gp dstRegByValue() { return x86::ecx; }
+//!
+//! void usingOperandsExample(x86::Assembler& a) {
+//!   // Gets `ecx` register returned by a function.
+//!   x86::Gp dst = dstRegByValue();
+//!   // Gets `rax` register directly from the provided `x86` namespace.
+//!   x86::Gp src = x86::rax;
+//!   // Constructs `r10` dynamically.
+//!   x86::Gp idx = x86::gpq(10);
+//!   // Constructs [src + idx] memory address - referencing [rax + r10].
+//!   x86::Mem m = x86::ptr(src, idx);
+//!
+//!   // Examine `m`: Returns `RegType::kX86_Gpq`.
+//!   m.indexType();
+//!   // Examine `m`: Returns 10 (`r10`).
+//!   m.indexId();
+//!
+//!   // Reconstruct `idx` stored in mem:
+//!   x86::Gp idx_2 = x86::Gp::fromTypeAndId(m.indexType(), m.indexId());
+//!
+//!   // True, `idx` and idx_2` are identical.
+//!   idx == idx_2;
+//!
+//!   // Possible - op will still be the same as `m`.
+//!   Operand op = m;
+//!   // True (can be casted to BaseMem or architecture-specific Mem).
+//!   op.isMem();
+//!
+//!   // True, `op` is just a copy of `m`.
+//!   m == op;
+//!
+//!   // Static cast is fine and valid here.
+//!   static_cast<BaseMem&>(op).addOffset(1);
+//!   // However, using `as<T>()` to cast to a derived type is preferred.
+//!   op.as<BaseMem>().addOffset(1);
+//!   // False, `op` now points to [rax + r10 + 2], which is not [rax + r10].
+//!   m == op;
+//!
+//!   // Emitting 'mov' - type safe way.
+//!   a.mov(dst, m);
+//!   // Not possible, `mov` doesn't provide mov(x86::Gp, Operand) overload.
+//!   a.mov(dst, op);
+//!
+//!   // Type-unsafe, but possible.
+//!   a.emit(x86::Inst::kIdMov, dst, m);
+//!   // Also possible, `emit()` is typeless and can be used with raw Operand.
+//!   a.emit(x86::Inst::kIdMov, dst, op);
+//! }
+//! ```
+//!
+//! Some operands have to be created explicitly by emitters. For example labels must be created by \ref
+//! BaseEmitter::newLabel(), which creates a label entry and returns a \ref Label operand with the id that refers
+//! to it. Such label then can be used by emitters.
+//!
+//! ### Memory Operands
+//!
+//! Some architectures like X86 provide a complex memory addressing model that allows to encode addresses having a
+//! BASE register, INDEX register with a possible scale (left shift), and displacement (called offset in AsmJit).
+//! Memory address on X86 can also specify memory segment (segment-override in X86 terminology) and some instructions
+//! (gather / scatter) require INDEX to be a \ref x86::Vec register instead of a general-purpose register.
+//!
+//! AsmJit allows to encode and work with all forms of addresses mentioned and implemented by X86. In addition, it
+//! also allows to construct absolute 64-bit memory address operands, which is only allowed in one form of 'mov'
+//! instruction.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! void testX86Mem() {
+//!   // Makes it easier to access x86 stuff...
+//!   using namespace asmjit::x86;
+//!
+//!   // BASE + OFFSET.
+//!   Mem a = ptr(rax);                 // a = [rax]
+//!   Mem b = ptr(rax, 15);             // b = [rax + 15]
+//!
+//!   // BASE + INDEX << SHIFT - Shift is in BITS as used by X86!
+//!   Mem c = ptr(rax, rbx);            // c = [rax + rbx]
+//!   Mem d = ptr(rax, rbx, 2);         // d = [rax + rbx << 2]
+//!   Mem e = ptr(rax, rbx, 2, 15);     // e = [rax + rbx << 2 + 15]
+//!
+//!   // BASE + VM (Vector Index) (encoded as MOD+VSIB).
+//!   Mem f = ptr(rax, xmm1);           // f = [rax + xmm1]
+//!   Mem g = ptr(rax, xmm1, 2);        // g = [rax + xmm1 << 2]
+//!   Mem h = ptr(rax, xmm1, 2, 15);    // h = [rax + xmm1 << 2 + 15]
+//!
+//!   // Absolute address:
+//!   uint64_t addr = (uint64_t)0x1234;
+//!   Mem i = ptr(addr);                // i = [0x1234]
+//!   Mem j = ptr(addr, rbx);           // j = [0x1234 + rbx]
+//!   Mem k = ptr(addr, rbx, 2);        // k = [0x1234 + rbx << 2]
+//!
+//!   // LABEL - Will be encoded as RIP (64-bit) or absolute address (32-bit).
+//!   Label L = ...;
+//!   Mem m = ptr(L);                   // m = [L]
+//!   Mem n = ptr(L, rbx);              // n = [L + rbx]
+//!   Mem o = ptr(L, rbx, 2);           // o = [L + rbx << 2]
+//!   Mem p = ptr(L, rbx, 2, 15);       // p = [L + rbx << 2 + 15]
+//!
+//!   // RIP - 64-bit only (RIP can't use INDEX).
+//!   Mem q = ptr(rip, 24);             // q = [rip + 24]
+//! }
+//! ```
+//!
+//! Memory operands can optionally contain memory size. This is required by instructions where the memory size cannot
+//! be deduced from other operands, like `inc` and `dec` on X86:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! void testX86Mem() {
+//!   // The same as: dword ptr [rax + rbx].
+//!   x86::Mem a = x86::dword_ptr(rax, rbx);
+//!
+//!   // The same as: qword ptr [rdx + rsi << 0 + 1].
+//!   x86::Mem b = x86::qword_ptr(rdx, rsi, 0, 1);
+//! }
+//! ```
+//!
+//! Memory operands provide API that can be used to access its properties:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! void testX86Mem() {
+//!   // The same as: dword ptr [rax + 12].
+//!   x86::Mem mem = x86::dword_ptr(rax, 12);
+//!
+//!   mem.hasBase();                    // true.
+//!   mem.hasIndex();                   // false.
+//!   mem.size();                       // 4.
+//!   mem.offset();                     // 12.
+//!
+//!   mem.setSize(0);                   // Sets the size to 0 (makes it sizeless).
+//!   mem.addOffset(-1);                // Adds -1 to the offset and makes it 11.
+//!   mem.setOffset(0);                 // Sets the offset to 0.
+//!   mem.setBase(rcx);                 // Changes BASE to RCX.
+//!   mem.setIndex(rax);                // Changes INDEX to RAX.
+//!   mem.hasIndex();                   // true.
+//! }
+//! // ...
+//! ```
+//!
+//! Making changes to memory operand is very comfortable when emitting loads
+//! and stores:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! void testX86Mem(CodeHolder& code) {
+//!   x86::Assembler a(code);           // Your initialized x86::Assembler.
+//!   x86::Mem mSrc = x86::ptr(eax);    // Construct [eax] memory operand.
+//!
+//!   // One way of emitting bunch of loads is to use `mem.adjusted()`, which
+//!   // returns a new memory operand and keeps the source operand unchanged.
+//!   a.movaps(x86::xmm0, mSrc);        // No adjustment needed to load [eax].
+//!   a.movaps(x86::xmm1, mSrc.adjusted(16)); // Loads from [eax + 16].
+//!   a.movaps(x86::xmm2, mSrc.adjusted(32)); // Loads from [eax + 32].
+//!   a.movaps(x86::xmm3, mSrc.adjusted(48)); // Loads from [eax + 48].
+//!
+//!   // ... do something with xmm0-3 ...
+//!
+//!   // Another way of adjusting memory is to change the operand in-place.
+//!   // If you want to keep the original operand you can simply clone it.
+//!   x86::Mem mDst = mSrc.clone();     // Clone mSrc.
+//!
+//!   a.movaps(mDst, x86::xmm0);        // Stores xmm0 to [eax].
+//!   mDst.addOffset(16);               // Adds 16 to `mDst`.
+//!
+//!   a.movaps(mDst, x86::xmm1);        // Stores to [eax + 16] .
+//!   mDst.addOffset(16);               // Adds 16 to `mDst`.
+//!
+//!   a.movaps(mDst, x86::xmm2);        // Stores to [eax + 32].
+//!   mDst.addOffset(16);               // Adds 16 to `mDst`.
+//!
+//!   a.movaps(mDst, x86::xmm3);        // Stores to [eax + 48].
+//! }
+//! ```
+//!
+//! ### Assembler Examples
+//!
+//!   - \ref x86::Assembler provides many X86/X64 examples.
+
+
+//! \defgroup asmjit_builder Builder
+//! \brief Builder interface, nodes, and passes.
+//!
+//! ### Overview
+//!
+//! Both \ref BaseBuilder and \ref BaseCompiler interfaces describe emitters that emit into a representation that
+//! allows further processing. The code stored in such representation is completely safe to be patched, simplified,
+//! reordered, obfuscated, removed, injected, analyzed, or processed some other way. Each instruction, label,
+//! directive, or other building block is stored as \ref BaseNode (or derived class like \ref InstNode or \ref
+//! LabelNode) and contains all the information necessary to pass that node later to the assembler.
+//!
+//! \ref BaseBuilder is an emitter that inherits from \ref BaseEmitter interface. It was designed to provide a maximum
+//! compatibility with the existing \ref BaseAssembler emitter so users can move from assembler to builder when needed,
+//! for example to implement post-processing, which is not possible with Assembler.
+//!
+//! ### Builder Nodes
+//!
+//! \ref BaseBuilder doesn't generate machine code directly, it uses an intermediate representation based on nodes,
+//! however, it allows to serialize to \ref BaseAssembler when the code is ready to be encoded.
+//!
+//! There are multiple node types used by both \ref BaseBuilder and \ref BaseCompiler :
+//!
+//!   - Basic nodes:
+//!     - \ref BaseNode - Base class for all nodes.
+//!     - \ref InstNode - Represents an instruction node.
+//!     - \ref AlignNode - Represents an alignment directive (.align).
+//!     - \ref LabelNode - Represents a location where to bound a \ref Label.
+//!
+//!   - Data nodes:
+//!     - \ref EmbedDataNode - Represents data.
+//!     - \ref EmbedLabelNode - Represents \ref Label address embedded as data.
+//!     - \ref EmbedLabelDeltaNode - Represents a difference of two labels embedded in data.
+//!     - \ref ConstPoolNode - Represents a constant pool data embedded as data.
+//!
+//!   - Informative nodes:
+//!     - \ref CommentNode - Represents a comment string, doesn't affect code generation.
+//!     - \ref SentinelNode - A marker that can be used to remember certain position in code or data, doesn't affect
+//!       code generation. Used by \ref FuncNode to mark the end of a function.
+//!
+//!   - Other nodes are provided by \ref asmjit_compiler infrastructure.
+//!
+//! ### Builder Examples
+//!
+//!   - \ref x86::Builder provides many X86/X64 examples.
+
+
+//! \defgroup asmjit_compiler Compiler
+//! \brief Compiler interface.
+//!
+//! ### Overview
+//!
+//! \ref BaseCompiler is a high-level interface, which provides register allocation and support for defining and
+//! invoking functions, built on top of \ref BaseBuilder interface At the moment it's the easiest way of generating
+//! code in AsmJit as most architecture and OS specifics is properly abstracted and handled by AsmJit automatically.
+//! However, abstractions also mean restrictions, which means that \ref BaseCompiler has more limitations than \ref
+//! BaseAssembler or \ref BaseBuilder.
+//!
+//! Since \ref BaseCompiler provides register allocation it also establishes the concept of functions - a function
+//! in Compiler sense is a unit in which virtual registers are allocated into physical registers by the register
+//! allocator. In addition, it enables to use such virtual registers in function invocations.
+//!
+//! \ref BaseCompiler automatically handles function calling conventions. It's still architecture dependent, but
+//! makes the code generation much easies. Functions are essential; the first-step to generate some code is to define
+//! a signature of the function to be generated (before generating the function body itself). Function arguments and
+//! return value(s) are handled by assigning virtual registers to them. Similarly, function calls are handled the same
+//! way.
+//!
+//! ### Compiler Nodes
+//!
+//! \ref BaseCompiler adds some nodes that are required for function generation and invocation:
+//!
+//!   - \ref FuncNode - Represents a function definition.
+//!   - \ref FuncRetNode - Represents a function return.
+//!   - \ref InvokeNode - Represents a function invocation.
+//!
+//! \ref BaseCompiler also makes the use of passes (\ref Pass) and automatically adds an architecture-dependent
+//! register allocator pass to the list of passes when attached to \ref CodeHolder.
+//!
+//! ### Compiler Examples
+//!
+//!   - \ref x86::Compiler provides many X86/X64 examples.
+//!
+//! ### Compiler Tips
+//!
+//! Users of AsmJit have done mistakes in the past, this section should provide some useful tips for beginners:
+//!
+//!   - Virtual registers in compiler are bound to a single function. At the moment the implementation doesn't
+//!     care whether a single virtual register is used in multiple functions, but it sees it as two independent
+//!     virtual registers in that case. This means that virtual registers cannot be used to implement global
+//!     variables. Global variables are basically memory addresses which functions can read from and write to,
+//!     and they have to be implemented in the same way.
+//!
+//!   - Compiler provides a useful debugging functionality, which can be turned on through \ref FormatFlags. Use
+//!     \ref Logger::addFlags() to turn on additional logging features when using Compiler.
+
+
+//! \defgroup asmjit_function Function
+//! \brief Function definitions.
+//!
+//! ### Overview
+//!
+//! AsmJit provides functionality that can be used to define function signatures and to calculate automatically
+//! optimal function frame that can be used directly by a prolog and epilog insertion. This feature was exclusive
+//! to AsmJit's Compiler for a very long time, but was abstracted out and is now available for all users regardless
+//! of the emitter they use. The following use cases are possible:
+//!
+//!   - Calculate function frame before the function is generated - this is the only way available to \ref
+//!     BaseAssembler users and it will be described in this section.
+//!
+//!   - Calculate function frame after the function is generated - this way is generally used by \ref BaseBuilder
+//!     and \ref BaseCompiler emitters and this way is generally described in \ref asmjit_compiler section.
+//!
+//! The following concepts are used to describe and create functions in AsmJit:
+//!
+//!   - \ref TypeId - Type-id is an 8-bit value that describes a platform independent type as we know from C/C++.
+//!     It provides abstractions for most common types like `int8_t`, `uint32_t`, `uintptr_t`, `float`, `double`,
+//!     and all possible vector types to match ISAs up to AVX512. \ref TypeId was introduced originally for \ref
+//!     asmjit_compiler, but it's now used by \ref FuncSignature as well.
+//!
+//!   - \ref CallConv - Describes a calling convention - this class contains instructions to assign registers and
+//!     stack addresses to function arguments and return value(s), but doesn't specify any function signature itself.
+//!     Calling conventions are architecture and OS dependent.
+//!
+//!   - \ref FuncSignature - Describes a function signature, for example `int func(int, int)`. FuncSignature contains
+//!     a function calling convention id, return value type, and function arguments. The signature itself is platform
+//!     independent and uses \ref TypeId to describe types of function arguments and function return value(s).
+//!
+//!   - \ref FuncDetail - Architecture and ABI dependent information that describes \ref CallConv and expanded \ref
+//!     FuncSignature. Each function argument and return value is represented as \ref FuncValue that contains the
+//!     original \ref TypeId enriched with additional information that specifies whether the value is passed or
+//!     returned by register (and which register) or by stack. Each value also contains some other metadata that
+//!     provide additional information required to handle it properly (for example whether a vector is passed
+//!     indirectly by a pointer as required by WIN64 calling convention).
+//!
+//!   - \ref FuncFrame - Contains information about the function frame that can be used by prolog/epilog inserter
+//!     (PEI). Holds call stack size size and alignment, local stack size and alignment, and various attributes that
+//!     describe how prolog and epilog should be constructed. `FuncFrame` doesn't know anything about function's
+//!     arguments or return values, it hold only information necessary to create a valid and ABI conforming function
+//!     prologs and epilogs.
+//!
+//!   - \ref FuncArgsAssignment - A helper class that can be used to reassign function arguments into user specified
+//!     registers. It's architecture and ABI dependent mapping from function arguments described by \ref CallConv
+//!     and \ref FuncDetail into registers specified by the user.
+//!
+//! It's a lot of concepts where each represents one step in a function frame calculation. It can be used to create
+//! function prologs, epilogs, and also to calculate information necessary to perform function calls.
+
+
+//! \defgroup asmjit_logging Logging
+//! \brief Logging and formatting.
+//!
+//! ### Overview
+//!
+//! The initial phase of a project that generates machine code is not always smooth. Failure cases are common not just
+//! at the beginning phase, but also during the development or refactoring. AsmJit provides logging functionality to
+//! address this issue. AsmJit does already a good job with function overloading to prevent from emitting unencodable
+//! instructions, but it can't prevent from emitting machine code that is correct at instruction level, but doesn't
+//! work when it's executed asa whole. Logging has always been an important part of AsmJit's infrastructure and looking
+//! at logs can sometimes reveal code generation issues quickly.
+//!
+//! AsmJit provides API for logging and formatting:
+//!
+//!   - \ref Logger - A logger that you can pass to \ref CodeHolder and all emitters that inherit from \ref BaseEmitter.
+//!
+//!   - \ref FormatOptions - Formatting options that can change how instructions and operands are formatted.
+//!
+//!   - \ref Formatter - A namespace that provides functions that can format input data like \ref Operand, \ref BaseReg,
+//!     \ref Label, and \ref BaseNode into \ref String.
+//!
+//! AsmJit's \ref Logger serves the following purposes:
+//!
+//!   - Provides a basic foundation for logging.
+//!
+//!   - Abstract class leaving the implementation on users. The following built-in implementations are provided for
+//!     simplicity:
+//!
+//!     - \ref FileLogger implements logging into a standard `FILE` stream.
+//!     - \ref StringLogger serializes all logs into a \ref String instance.
+//!
+//! AsmJit's \ref FormatOptions provides the following to customize the formatting of instructions and operands through:
+//!
+//!   - \ref FormatFlags
+//!   - \ref FormatIndentationGroup
+//!
+//! ### Logging
+//!
+//! A \ref Logger is typically attached to a \ref CodeHolder, which propagates it to all attached emitters
+//! automatically. The example below illustrates how to use \ref FileLogger that outputs to standard output:
+//!
+//! ```
+//! #include <asmjit/core.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! int main() {
+//!   JitRuntime rt;               // Runtime specialized for JIT code execution.
+//!   FileLogger logger(stdout);   // Logger should always survive CodeHolder.
+//!
+//!   CodeHolder code;             // Holds code and relocation information.
+//!   code.init(rt.environment()); // Initialize to the same arch as JIT runtime.
+//!   code.setLogger(&logger);     // Attach the `logger` to `code` holder.
+//!
+//!   // ... code as usual, everything emitted will be logged to `stdout` ...
+//!   return 0;
+//! }
+//! ```
+//!
+//! If output to FILE stream is not desired it's possible to use \ref StringLogger, which concatenates everything
+//! into a multi-line string:
+//!
+//! ```
+//! #include <asmjit/core.h>
+//! #include <stdio.h>
+//! #include <utility>
+//!
+//! using namespace asmjit;
+//!
+//! int main() {
+//!   JitRuntime rt;               // Runtime specialized for JIT code execution.
+//!   StringLogger logger;         // Logger should always survive CodeHolder.
+//!
+//!   CodeHolder code;             // Holds code and relocation information.
+//!   code.init(rt.environment()); // Initialize to the same arch as JIT runtime.
+//!   code.setLogger(&logger);     // Attach the `logger` to `code` holder.
+//!
+//!   // ... code as usual, logging will be concatenated to logger string  ...
+//!
+//!   // You can either use the string from StringLogger directly or you can
+//!   // move it. Logger::data() returns its content as null terminated char[].
+//!   printf("Logger content: %s\n", logger.data());
+//!
+//!   // It can be moved into your own string like this:
+//!   String content = std::move(logger.content());
+//!   printf("The same content: %s\n", content.data());
+//!
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Formatting
+//!
+//! AsmJit uses \ref Formatter to format inputs that are then passed to \ref Logger. Formatting is public and can be
+//! used by AsmJit users as well. The most important thing to know regarding formatting is that \ref Formatter always
+//! appends to the output string, so it can be used to build complex strings without having to concatenate
+//! intermediate strings.
+//!
+//! The first example illustrates how to format operands:
+//!
+//! ```
+//! #include <asmjit/core.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! void logOperand(Arch arch, const Operand_& op) {
+//!   // The emitter is optional (named labels and virtual registers need it).
+//!   BaseEmitter* emitter = nullptr;
+//!
+//!   // No flags by default.
+//!   FormatFlags formatFlags = FormatFlags::kNone;
+//!
+//!   StringTmp<128> sb;
+//!   Formatter::formatOperand(sb, formatFlags, emitter, arch, op);
+//!   printf("%s\n", sb.data());
+//! }
+//!
+//! void formattingExample() {
+//!   using namespace x86;
+//!
+//!   // Architecture is not part of operand, it must be passed explicitly.
+//!   // Format flags. We pass it explicitly also to 'logOperand' to make
+//!   // compatible with what AsmJit normally does.
+//!   Arch arch = Arch::kX64;
+//!
+//!   log(arch, rax);                    // Prints 'rax'.
+//!   log(arch, ptr(rax, rbx, 2));       // Prints '[rax + rbx * 4]`.
+//!   log(arch, dword_ptr(rax, rbx, 2)); // Prints 'dword [rax + rbx * 4]`.
+//!   log(arch, imm(42));                // Prints '42'.
+//! }
+//! ```
+//!
+//! Next example illustrates how to format whole instructions:
+//!
+//! ```
+//! #include <asmjit/core.h>
+//! #include <stdio.h>
+//! #include <utility>
+//!
+//! using namespace asmjit;
+//!
+//! template<typename... Args>
+//! void logInstruction(Arch arch, const BaseInst& inst, Args&&... args) {
+//!   // The emitter is optional (named labels and virtual registers need it).
+//!   BaseEmitter* emitter = nullptr;
+//!
+//!   // No flags by default.
+//!   FormatFlags formatFlags = FormatFlags::kNone;
+//!
+//!   // The formatter expects operands in an array.
+//!   Operand_ operands { std::forward<Args>(args)... };
+//!
+//!   StringTmp<128> sb;
+//!   Formatter::formatInstruction(
+//!     sb, formatFlags, emitter, arch, inst, operands, sizeof...(args));
+//!   printf("%s\n", sb.data());
+//! }
+//!
+//! void formattingExample() {
+//!   using namespace x86;
+//!
+//!   // Architecture is not part of operand, it must be passed explicitly.
+//!   // Format flags. We pass it explicitly also to 'logOperand' to make
+//!   // compatible with what AsmJit normally does.
+//!   Arch arch = Arch::kX64;
+//!
+//!   // Prints 'mov rax, rcx'.
+//!   logInstruction(arch, BaseInst(Inst::kIdMov), rax, rcx);
+//!
+//!   // Prints 'vaddpd zmm0, zmm1, [rax] {1to8}'.
+//!   logInstruction(arch,
+//!                  BaseInst(Inst::kIdVaddpd),
+//!                  zmm0, zmm1, ptr(rax)._1toN());
+//!
+//!   // BaseInst abstracts instruction id, instruction options, and extraReg.
+//!   // Prints 'lock add [rax], rcx'.
+//!   logInstruction(arch,
+//!                  BaseInst(Inst::kIdAdd, InstOptions::kX86_Lock),
+//!                  x86::ptr(rax), rcx);
+//!
+//!   // Similarly an extra register (like AVX-512 selector) can be used.
+//!   // Prints 'vaddpd zmm0 {k2} {z}, zmm1, [rax]'.
+//!   logInstruction(arch,
+//!                  BaseInst(Inst::kIdAdd, InstOptions::kX86_ZMask, k2),
+//!                  zmm0, zmm1, ptr(rax));
+//! }
+//! ```
+//!
+//! And finally, the example below illustrates how to use a built-in function to format the content of
+//! \ref BaseBuilder, which consists of nodes:
+//!
+//! ```
+//! #include <asmjit/core.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! void formattingExample(BaseBuilder* builder) {
+//!   FormatFlags formatFlags = FormatFlags::kNone;
+//!
+//!   // This also shows how temporary strings can be used.
+//!   StringTmp<512> sb;
+//!
+//!   // FormatNodeList requires the String for output, formatting flags, which
+//!   // were zero (no extra flags), and the builder instance, which we have
+//!   // provided. An overloaded version also exists, which accepts begin and
+//!   // and end nodes, which can be used to only format a range of nodes.
+//!   Formatter::formatNodeList(sb, formatFlags, builder);
+//!
+//!   // You can do whatever else with the string, it's always null terminated,
+//!   // so it can be passed to C functions like printf().
+//!   printf("%s\n", sb.data());
+//! }
+//! ```
+
+
+//! \defgroup asmjit_error_handling Error Handling
+//! \brief Error handling.
+//!
+//! ### Overview
+//!
+//! AsmJit uses error codes to represent and return errors. Every function that can fail returns an \ref Error code.
+//! Exceptions are never thrown by AsmJit itself even in extreme conditions like out-of-memory, but it's possible to
+//! override \ref ErrorHandler::handleError() to throw, in that case no error will be returned and exception will be
+//! thrown instead. All functions where this can happen are not marked `noexcept`.
+//!
+//! Errors should never be ignored, however, checking errors after each AsmJit API call would simply overcomplicate
+//! the whole code generation experience. \ref ErrorHandler exists to make the use of AsmJit API simpler as it allows
+//! to customize how errors can be handled:
+//!
+//!   - Record the error and continue (the way how the error is user-implemented).
+//!   - Throw an exception. AsmJit doesn't use exceptions and is completely exception-safe, but it's perfectly legal
+//!     to throw an exception from the error handler.
+//!   - Use plain old C's `setjmp()` and `longjmp()`. Asmjit always puts Assembler, Builder and Compiler to a
+//!     consistent state before calling \ref ErrorHandler::handleError(), so `longjmp()` can be used without issues
+//!     to cancel the code-generation if an error occurred. This method can be used if exception handling in your
+//!     project is turned off and you still want some comfort. In most cases it should be safe as AsmJit uses \ref
+//!     Zone memory and the ownership of memory it allocates always ends with the instance that allocated it. If
+//!     using this approach please never jump outside the life-time of \ref CodeHolder and \ref BaseEmitter.
+//!
+//! ### Using ErrorHandler
+//!
+//! An example of attaching \ref ErrorHandler to \ref CodeHolder.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // A simple error handler implementation, extend according to your needs.
+//! class MyErrorHandler : public ErrorHandler {
+//! public:
+//!   void handleError(Error err, const char* message, BaseEmitter* origin) override {
+//!     printf("AsmJit error: %s\n", message);
+//!   }
+//! };
+//!
+//! int main() {
+//!   JitRuntime rt;
+//!
+//!   MyErrorHandler myErrorHandler;
+//!   CodeHolder code;
+//!
+//!   code.init(rt.environment());
+//!   code.setErrorHandler(&myErrorHandler);
+//!
+//!   x86::Assembler a(&code);
+//!   // ... code generation ...
+//!
+//!   return 0;
+//! }
+//! ```
+//!
+//! Useful classes in error handling group:
+//!
+//!   - See \ref DebugUtils that provides utilities useful for debugging.
+//!   - See \ref Error that lists error codes that AsmJit uses.
+//!   - See \ref ErrorHandler for more details about error handling.
+
+
+//! \defgroup asmjit_instruction_db Instruction DB
+//! \brief Instruction database (introspection, read/write, validation, ...).
+//!
+//! ### Overview
+//!
+//! AsmJit provides a public instruction database that can be used to query information about a complete instruction.
+//! The instruction database requires the knowledge of the following:
+//!
+//!   - \ref BaseInst - Base instruction that contains instruction id, options, and a possible extra-register that
+//!     represents either REP prefix counter or AVX-512 selector (mask).
+//!
+//!   - \ref Operand - Represents operands of an instruction.
+//!
+//! Each instruction can be then queried for the following information:
+//!
+//!   - \ref InstRWInfo - Read/write information of instruction and its oprands (includes \ref OpRWInfo).
+//!
+//!   - \ref CpuFeatures - CPU features required to execute the instruction.
+//!
+//! In addition to query functionality AsmJit is also able to validate whether an instruction and its operands are
+//! valid. This is useful for making sure that what user tries to emit is correct and it can be also used by other
+//! projects that parse user input, like AsmTK project.
+//!
+//! ### Query API
+//!
+//! The instruction query API is provided by \ref InstAPI namespace. The following queries are possible:
+//!
+//!   - \ref InstAPI::queryRWInfo() - queries read/write information of the given instruction and its operands.
+//!     Includes also CPU flags read/written.
+//!
+//!   - \ref InstAPI::queryFeatures() - queries CPU features that are required to execute the given instruction. A full
+//!     instruction with operands must be given as some architectures like X86 may require different features for the
+//!     same instruction based on its operands.
+//!
+//!   - <a href="https://github.com/asmjit/asmjit/blob/master/test/asmjit_test_instinfo.cpp">asmjit_test_instinfo.cpp</a>
+//!     can be also used as a reference about accessing instruction information.
+//!
+//! ### Validation API
+//!
+//! The instruction validation API is provided by \ref InstAPI namespace in the similar fashion like the Query API,
+//! however, validation can also be turned on at \ref BaseEmitter level. The following is possible:
+//!
+//!   - \ref InstAPI::validate() - low-level instruction validation function that is used internally by emitters
+//!     if strict validation is enabled.
+//!
+//!   - \ref BaseEmitter::addDiagnosticOptions() - can be used to enable validation at emitter level, see \ref
+//!     DiagnosticOptions.
+
+
+//! \defgroup asmjit_virtual_memory Virtual Memory
+//! \brief Virtual memory management.
+//!
+//! ### Overview
+//!
+//! AsmJit's virtual memory management is divided into two main categories:
+//!
+//!   - Low level API that provides cross-platform abstractions for virtual memory allocation. Implemented in
+//!     \ref VirtMem namespace.
+//!
+//!   - High level API that makes it very easy to store generated code for execution. See \ref JitRuntime, which is
+//!     used by many examples for its simplicity and easy integration with \ref CodeHolder. There is also \ref
+//!     JitAllocator, which lays somewhere between RAW memory allocation and \ref JitRuntime.
+
+
+//! \defgroup asmjit_zone Zone Memory
+//! \brief Zone memory allocator and containers.
+//!
+//! ### Overview
+//!
+//! AsmJit uses zone memory allocation (also known as Arena allocation) to allocate most of the data it uses. It's a
+//! fast allocator that allows AsmJit to allocate a lot of small data structures fast and without `malloc()` overhead.
+//! Since code generators and all related classes are usually short-lived this approach decreases memory usage and
+//! fragmentation as arena-based allocators always allocate larger blocks of memory, which are then split into smaller
+//! chunks.
+//!
+//! Another advantage of zone memory allocation is that since the whole library uses this strategy it's very easy to
+//! deallocate everything that a particular instance is holding by simply releasing the memory the allocator holds.
+//! This improves destruction time of such objects as there is no destruction at all. Long-lived objects just reset
+//! its data in destructor or in their reset() member function for a future reuse. For this purpose all containers in
+//! AsmJit are also zone allocated.
+//!
+//! ### Zone Allocation
+//!
+//!   - \ref Zone - Incremental zone memory allocator with minimum features. It can only allocate memory without the
+//!     possibility to return it back to the allocator.
+//!
+//!   - \ref ZoneTmp - A temporary \ref Zone with some initial static storage. If the allocation requests fit the
+//!     static storage allocated then there will be no dynamic memory allocation during the lifetime of \ref ZoneTmp,
+//!     otherwise it would act as \ref Zone with one preallocated block on the stack.
+//!
+//!   - \ref ZoneAllocator - A wrapper of \ref Zone that provides the capability of returning memory to the allocator.
+//!     Such memory is stored in a pool for later reuse.
+//!
+//! ### Zone Allocated Containers
+//!
+//!   - \ref ZoneString - Zone allocated string.
+//!   - \ref ZoneHash - Zone allocated hash table.
+//!   - \ref ZoneTree - Zone allocated red-black tree.
+//!   - \ref ZoneList - Zone allocated double-linked list.
+//!   - \ref ZoneStack - Zone allocated stack.
+//!   - \ref ZoneVector - Zone allocated vector.
+//!   - \ref ZoneBitVector - Zone allocated vector of bits.
+//!
+//! ### Using Zone Allocated Containers
+//!
+//! The most common data structure exposed by AsmJit is \ref ZoneVector. It's very similar to `std::vector`, but the
+//! implementation doesn't use exceptions and uses the mentioned \ref ZoneAllocator for performance reasons. You don't
+//! have to worry about allocations as you should not need to add items to AsmJit's data structures directly as there
+//! should be API for all required operations.
+//!
+//! The following APIs in \ref CodeHolder returns \ref ZoneVector reference:
+//!
+//! ```
+//! using namespace asmjit;
+//!
+//! void example(CodeHolder& code) {
+//!   // Contains all emitters attached to CodeHolder.
+//!   const ZoneVector<BaseEmitter*>& emitters = code.emitters();
+//!
+//!   // Contains all section entries managed by CodeHolder.
+//!   const ZoneVector<Section*>& sections = code.sections();
+//!
+//!   // Contains all label entries managed by CodeHolder.
+//!   const ZoneVector<LabelEntry*>& labelEntries = code.labelEntries();
+//!
+//!   // Contains all relocation entries managed by CodeHolder.
+//!   const ZoneVector<RelocEntry*>& relocEntries = code.relocEntries();
+//! }
+//! ```
+//!
+//! \ref ZoneVector has overloaded array access operator to make it possible to access its elements through operator[].
+//! Some standard functions like \ref ZoneVector::empty(), \ref ZoneVector::size(), and \ref ZoneVector::data() are
+//! provided as well. Vectors are also iterable through a range-based for loop:
+//!
+//! ```
+//! using namespace asmjit;
+//!
+//! void example(CodeHolder& code) {
+//!   for (LabelEntry* le : code.labelEntries()) {
+//!     printf("Label #%u {Bound=%s Offset=%llu}",
+//!       le->id(),
+//!       le->isBound() ? "true" : "false",
+//!       (unsigned long long)le->offset());
+//!   }
+//! }
+//! ```
+//!
+//! ### Design Considerations
+//!
+//! Zone-allocated containers do not store the allocator within the container. This decision was made to reduce the
+//! footprint of such containers as AsmJit tooling, especially Compiler's register allocation, may use many instances
+//! of such containers to perform code analysis and register allocation.
+//!
+//! For example to append an item into a \ref ZoneVector it's required to pass the allocator as the first argument,
+//! so it can be used in case that the vector needs a reallocation. Such function also returns an error, which must
+//! be propagated to the caller.
+//!
+//! ```
+//! using namespace asmjit
+//!
+//! Error example(ZoneAllocator* allocator) {
+//!   ZoneVector<int> vector;
+//!
+//!   // Unfortunately, allocator must be provided to all functions that mutate
+//!   // the vector. However, AsmJit users should never need to do this as all
+//!   // manipulation should be done through public API, which takes care of
+//!   // that.
+//!   for (int i = 0; i < 100; i++) {
+//!     ASMJIT_PROPAGATE(vector.append(allocator, i));
+//!   }
+//!
+//!   // By default vector's destructor doesn't release anything as it knows
+//!   // that its content is zone allocated. However, \ref ZoneVector::release
+//!   // can be used to explicitly release the vector data to the allocator if
+//!   // necessary
+//!   vector.release(allocator);
+//! }
+//! ```
+//!
+//! Containers like \ref ZoneVector also provide a functionality to reserve a certain number of items before any items
+//! are added to it. This approach is used internally in most places as it allows to prepare space for data that will
+//! be added to some container before the data itself was created.
+//!
+//! ```
+//! using namespace asmjit
+//!
+//! Error example(ZoneAllocator* allocator) {
+//!   ZoneVector<int> vector;
+//!
+//!   ASMJIT_PROPAGATE(vector.willGrow(100));
+//!   for (int i = 0; i < 100; i++) {
+//!     // Cannot fail.
+//!     vector.appendUnsafe(allocator, i);
+//!   }
+//!
+//!   vector.release(allocator);
+//! }
+//! ```
+
+
+//! \defgroup asmjit_utilities Utilities
+//! \brief Utility classes and functions.
+//!
+//! ### Overview
+//!
+//! AsmJit uses and provides utility classes and functions, that can be used with AsmJit. The functionality can be
+//! divided into the following topics:
+//!
+//! ### String Functionality
+//!
+//!   - \ref String - AsmJit's string container, which is used internally and which doesn't use exceptions and has
+//!     a stable layout, which is not dependent on C++ standard library.
+//!
+//!   - \ref StringTmp - String that can have base storage allocated on stack. The amount of storage on stack can
+//!     be specified as a template parameter.
+//!
+//!   - \ref FixedString - Fixed string container limited up to N characters.
+//!
+//! ### Code Generation Utilities
+//!
+//!   - \ref ConstPool - Constant pool used by \ref BaseCompiler, but also available to users that may find use of it.
+//!
+//! ### Support Functionality Used by AsmJit
+//!
+//!   - \ref Support namespace provides many other utility functions and classes that are used by AsmJit, and made
+//!     public.
+
+
+//! \defgroup asmjit_x86 X86 Backend
+//! \brief X86/X64 backend.
+
+
+//! \defgroup asmjit_arm ARM Commons
+//! \brief ARM commons shared between AArch32 and AArch64.
+
+
+//! \defgroup asmjit_a64 AArch64 Backend
+//! \brief AArch64 backend.
+
+
+//! \cond INTERNAL
+//! \defgroup asmjit_ra RA
+//! \brief Register allocator internals.
+//! \endcond
+
+} // {asmjit}
+
+#include "asmjit-scope-begin.h"
+#include "core/archtraits.h"
+#include "core/assembler.h"
+#include "core/builder.h"
+#include "core/codeholder.h"
+#include "core/compiler.h"
+#include "core/constpool.h"
+#include "core/cpuinfo.h"
+#include "core/emitter.h"
+#include "core/environment.h"
+#include "core/errorhandler.h"
+#include "core/formatter.h"
+#include "core/func.h"
+#include "core/globals.h"
+#include "core/inst.h"
+#include "core/jitallocator.h"
+#include "core/jitruntime.h"
+#include "core/logger.h"
+#include "core/operand.h"
+#include "core/osutils.h"
+#include "core/string.h"
+#include "core/support.h"
+#include "core/target.h"
+#include "core/type.h"
+#include "core/virtmem.h"
+#include "core/zone.h"
+#include "core/zonehash.h"
+#include "core/zonelist.h"
+#include "core/zonetree.h"
+#include "core/zonestack.h"
+#include "core/zonestring.h"
+#include "core/zonevector.h"
+#include "asmjit-scope-end.h"
+
+#endif // ASMJIT_CORE_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/api-build_p.h b/lib/lepton/asmjit/core/api-build_p.h
new file mode 100644
index 0000000000..6eca971037
--- /dev/null
+++ b/lib/lepton/asmjit/core/api-build_p.h
@@ -0,0 +1,55 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_API_BUILD_P_H_INCLUDED
+#define ASMJIT_CORE_API_BUILD_P_H_INCLUDED
+
+#define ASMJIT_EXPORTS
+
+// Only turn-off these warnings when building asmjit itself.
+#ifdef _MSC_VER
+  #ifndef _CRT_SECURE_NO_DEPRECATE
+    #define _CRT_SECURE_NO_DEPRECATE
+  #endif
+  #ifndef _CRT_SECURE_NO_WARNINGS
+    #define _CRT_SECURE_NO_WARNINGS
+  #endif
+#endif
+
+// Dependencies only required for asmjit build, but never exposed through public headers.
+#ifdef _WIN32
+  #ifndef WIN32_LEAN_AND_MEAN
+    #define WIN32_LEAN_AND_MEAN
+  #endif
+  #ifndef NOMINMAX
+    #define NOMINMAX
+  #endif
+  #include <windows.h>
+#endif
+
+#include "./api-config.h"
+
+#if !defined(ASMJIT_BUILD_DEBUG) && defined(__GNUC__) && !defined(__clang__)
+  #define ASMJIT_FAVOR_SIZE  __attribute__((__optimize__("Os")))
+  #define ASMJIT_FAVOR_SPEED __attribute__((__optimize__("O3")))
+#elif ASMJIT_CXX_HAS_ATTRIBUTE(__minsize__, 0)
+  #define ASMJIT_FAVOR_SIZE __attribute__((__minsize__))
+  #define ASMJIT_FAVOR_SPEED
+#else
+  #define ASMJIT_FAVOR_SIZE
+  #define ASMJIT_FAVOR_SPEED
+#endif
+
+// Make sure '#ifdef'ed unit tests are properly highlighted in IDE.
+#if !defined(ASMJIT_TEST) && defined(__INTELLISENSE__)
+  #define ASMJIT_TEST
+#endif
+
+// Include a unit testing package if this is a `asmjit_test_unit` build.
+#if defined(ASMJIT_TEST)
+  #include "../../../test/broken.h"
+#endif
+
+#endif // ASMJIT_CORE_API_BUILD_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/api-config.h b/lib/lepton/asmjit/core/api-config.h
new file mode 100644
index 0000000000..a0fb979eb3
--- /dev/null
+++ b/lib/lepton/asmjit/core/api-config.h
@@ -0,0 +1,613 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_API_CONFIG_H_INCLUDED
+#define ASMJIT_CORE_API_CONFIG_H_INCLUDED
+
+// AsmJit Library & ABI Version
+// ============================
+
+//! \addtogroup asmjit_core
+//! \{
+
+//! AsmJit library version in `(Major << 16) | (Minor << 8) | (Patch)` format.
+#define ASMJIT_LIBRARY_VERSION 0x010900 /* 1.9.0 */
+
+//! \def ASMJIT_ABI_NAMESPACE
+//!
+//! AsmJit ABI namespace is an inline namespace within \ref asmjit namespace.
+//!
+//! It's used to make sure that when user links to an incompatible version of AsmJit, it won't link. It has also some
+//! additional properties as well. When `ASMJIT_ABI_NAMESPACE` is defined by the user it would override the AsmJit
+//! default, which makes it possible to use use multiple AsmJit libraries within a single project, totally controlled
+//! by the users. This is useful especially in cases in which some of such library comes from a third party.
+#ifndef ASMJIT_ABI_NAMESPACE
+  #define ASMJIT_ABI_NAMESPACE _abi_1_9
+#endif
+
+//! \}
+
+// Global Dependencies
+// ===================
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h> // We really want std types as globals, not under 'std' namespace.
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <iterator>
+#include <limits>
+#include <new>
+#include <type_traits>
+#include <utility>
+
+#if !defined(_WIN32) && !defined(__EMSCRIPTEN__)
+  #include <pthread.h>
+#endif
+
+// Build Options
+// =============
+
+// NOTE: Doxygen cannot document macros that are not defined, that's why we have to define them and then undefine
+// them immediately, so it won't use the macros with its own preprocessor.
+#ifdef _DOXYGEN
+namespace asmjit {
+
+//! \addtogroup asmjit_build
+//! \{
+
+//! Asmjit is embedded, implies \ref ASMJIT_STATIC.
+#define ASMJIT_EMBED
+
+//! Enables static-library build.
+#define ASMJIT_STATIC
+
+//! Defined when AsmJit's build configuration is 'Debug'.
+//!
+//! \note Can be defined explicitly to bypass autodetection.
+#define ASMJIT_BUILD_DEBUG
+
+//! Defined when AsmJit's build configuration is 'Release'.
+//!
+//! \note Can be defined explicitly to bypass autodetection.
+#define ASMJIT_BUILD_RELEASE
+
+//! Disables X86/X64 backends.
+#define ASMJIT_NO_X86
+
+//! Disables AArch32 backends (both ARM and Thumb).
+#define ASMJIT_NO_AARCH32
+
+//! Disables AArch64 backend.
+#define ASMJIT_NO_AARCH64
+
+//! Disables non-host backends entirely (useful for JIT compilers to minimize the library size).
+#define ASMJIT_NO_FOREIGN
+
+//! Disables deprecated API at compile time (deprecated API won't be available).
+#define ASMJIT_NO_DEPRECATED
+
+//! Disables \ref asmjit_builder functionality completely.
+#define ASMJIT_NO_BUILDER
+
+//! Disables \ref asmjit_compiler functionality completely.
+#define ASMJIT_NO_COMPILER
+
+//! Disables JIT memory management and \ref asmjit::JitRuntime.
+#define ASMJIT_NO_JIT
+
+//! Disables \ref asmjit::Logger and \ref asmjit::Formatter.
+#define ASMJIT_NO_LOGGING
+
+//! Disables everything that contains text.
+#define ASMJIT_NO_TEXT
+
+//! Disables instruction validation API.
+#define ASMJIT_NO_VALIDATION
+
+//! Disables instruction introspection API.
+#define ASMJIT_NO_INTROSPECTION
+
+// Avoid doxygen preprocessor using feature-selection definitions.
+#undef ASMJIT_BUILD_EMBNED
+#undef ASMJIT_BUILD_STATIC
+#undef ASMJIT_BUILD_DEBUG
+#undef ASMJIT_BUILD_RELEASE
+#undef ASMJIT_NO_X86
+#undef ASMJIT_NO_FOREIGN
+// (keep ASMJIT_NO_DEPRECATED defined, we don't document deprecated APIs).
+#undef ASMJIT_NO_BUILDER
+#undef ASMJIT_NO_COMPILER
+#undef ASMJIT_NO_JIT
+#undef ASMJIT_NO_LOGGING
+#undef ASMJIT_NO_TEXT
+#undef ASMJIT_NO_VALIDATION
+#undef ASMJIT_NO_INTROSPECTION
+
+//! \}
+
+} // {asmjit}
+#endif // _DOXYGEN
+
+// ASMJIT_NO_BUILDER implies ASMJIT_NO_COMPILER.
+#if defined(ASMJIT_NO_BUILDER) && !defined(ASMJIT_NO_COMPILER)
+  #define ASMJIT_NO_COMPILER
+#endif
+
+// Prevent compile-time errors caused by misconfiguration.
+#if defined(ASMJIT_NO_TEXT) && !defined(ASMJIT_NO_LOGGING)
+  #pragma message("'ASMJIT_NO_TEXT' can only be defined when 'ASMJIT_NO_LOGGING' is defined.")
+  #undef ASMJIT_NO_TEXT
+#endif
+
+#if defined(ASMJIT_NO_INTROSPECTION) && !defined(ASMJIT_NO_COMPILER)
+  #pragma message("'ASMJIT_NO_INTROSPECTION' can only be defined when 'ASMJIT_NO_COMPILER' is defined")
+  #undef ASMJIT_NO_INTROSPECTION
+#endif
+
+// Build Mode
+// ==========
+
+// Detect ASMJIT_BUILD_DEBUG and ASMJIT_BUILD_RELEASE if not defined.
+#if !defined(ASMJIT_BUILD_DEBUG) && !defined(ASMJIT_BUILD_RELEASE)
+  #if !defined(NDEBUG)
+    #define ASMJIT_BUILD_DEBUG
+  #else
+    #define ASMJIT_BUILD_RELEASE
+  #endif
+#endif
+
+// Target Architecture Detection
+// =============================
+
+#if defined(_M_X64) || defined(__x86_64__)
+  #define ASMJIT_ARCH_X86 64
+#elif defined(_M_IX86) || defined(__X86__) || defined(__i386__)
+  #define ASMJIT_ARCH_X86 32
+#else
+  #define ASMJIT_ARCH_X86 0
+#endif
+
+#if defined(__arm64__) || defined(__aarch64__)
+# define ASMJIT_ARCH_ARM 64
+#elif defined(_M_ARM) || defined(_M_ARMT) || defined(__arm__) || defined(__thumb__) || defined(__thumb2__)
+  #define ASMJIT_ARCH_ARM 32
+#else
+  #define ASMJIT_ARCH_ARM 0
+#endif
+
+#if defined(_MIPS_ARCH_MIPS64) || defined(__mips64)
+  #define ASMJIT_ARCH_MIPS 64
+#elif defined(_MIPS_ARCH_MIPS32) || defined(_M_MRX000) || defined(__mips__)
+  #define ASMJIT_ARCH_MIPS 32
+#else
+  #define ASMJIT_ARCH_MIPS 0
+#endif
+
+#define ASMJIT_ARCH_BITS (ASMJIT_ARCH_X86 | ASMJIT_ARCH_ARM | ASMJIT_ARCH_MIPS)
+#if ASMJIT_ARCH_BITS == 0
+  #undef ASMJIT_ARCH_BITS
+  #if defined (__LP64__) || defined(_LP64)
+    #define ASMJIT_ARCH_BITS 64
+  #else
+    #define ASMJIT_ARCH_BITS 32
+  #endif
+#endif
+
+#if (defined(__ARMEB__))  || \
+    (defined(__MIPSEB__)) || \
+    (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
+  #define ASMJIT_ARCH_LE 0
+  #define ASMJIT_ARCH_BE 1
+#else
+  #define ASMJIT_ARCH_LE 1
+  #define ASMJIT_ARCH_BE 0
+#endif
+
+#if defined(ASMJIT_NO_FOREIGN)
+  #if !ASMJIT_ARCH_X86 && !defined(ASMJIT_NO_X86)
+    #define ASMJIT_NO_X86
+  #endif
+
+  #if !ASMJIT_ARCH_ARM && !defined(ASMJIT_NO_AARCH64)
+    #define ASMJIT_NO_AARCH64
+  #endif
+#endif
+
+
+// C++ Compiler and Features Detection
+// ===================================
+
+#define ASMJIT_CXX_GNU 0
+#define ASMJIT_CXX_MAKE_VER(MAJOR, MINOR) ((MAJOR) * 1000 + (MINOR))
+
+// Intel Compiler [pretends to be GNU or MSC, so it must be checked first]:
+//   - https://software.intel.com/en-us/articles/c0x-features-supported-by-intel-c-compiler
+//   - https://software.intel.com/en-us/articles/c14-features-supported-by-intel-c-compiler
+//   - https://software.intel.com/en-us/articles/c17-features-supported-by-intel-c-compiler
+#if defined(__INTEL_COMPILER)
+
+// MSC Compiler:
+//   - https://msdn.microsoft.com/en-us/library/hh567368.aspx
+//
+// Version List:
+//   - 16.00.0 == VS2010
+//   - 17.00.0 == VS2012
+//   - 18.00.0 == VS2013
+//   - 19.00.0 == VS2015
+//   - 19.10.0 == VS2017
+#elif defined(_MSC_VER) && defined(_MSC_FULL_VER)
+
+// Clang Compiler [Pretends to be GNU, so it must be checked before]:
+//   - https://clang.llvm.org/cxx_status.html
+#elif defined(__clang_major__) && defined(__clang_minor__) && defined(__clang_patchlevel__)
+
+// GNU Compiler:
+//   - https://gcc.gnu.org/projects/cxx-status.html
+#elif defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
+
+  #undef ASMJIT_CXX_GNU
+  #define ASMJIT_CXX_GNU ASMJIT_CXX_MAKE_VER(__GNUC__, __GNUC_MINOR__)
+
+#endif
+
+// Compiler features detection macros.
+#if defined(__clang__) && defined(__has_attribute)
+  #define ASMJIT_CXX_HAS_ATTRIBUTE(NAME, CHECK) (__has_attribute(NAME))
+#else
+  #define ASMJIT_CXX_HAS_ATTRIBUTE(NAME, CHECK) (!(!(CHECK)))
+#endif
+
+// API Decorators & C++ Extensions
+// ===============================
+
+//! \def ASMJIT_API
+//!
+//! A decorator that is used to decorate API that AsmJit exports when built as a shared library.
+
+// API (Export / Import).
+#if !defined(ASMJIT_STATIC)
+  #if defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__))
+    #ifdef ASMJIT_EXPORTS
+      #define ASMJIT_API __declspec(dllexport)
+    #else
+      #define ASMJIT_API __declspec(dllimport)
+    #endif
+  #elif defined(_WIN32) && defined(__GNUC__)
+    #ifdef ASMJIT_EXPORTS
+      #define ASMJIT_API __attribute__((__dllexport__))
+    #else
+      #define ASMJIT_API __attribute__((__dllimport__))
+    #endif
+  #elif defined(__GNUC__)
+    #define ASMJIT_API __attribute__((__visibility__("default")))
+  #endif
+#endif
+
+#if !defined(ASMJIT_API)
+  #define ASMJIT_API
+#endif
+
+#if !defined(ASMJIT_VARAPI)
+  #define ASMJIT_VARAPI extern ASMJIT_API
+#endif
+
+//! \def ASMJIT_VIRTAPI
+//!
+//! This is basically a workaround. When using MSVC and marking class as DLL export everything gets exported, which
+//! is unwanted in most projects. MSVC automatically exports typeinfo and vtable if at least one symbol of the class
+//! is exported. However, GCC has some strange behavior that even if one or more symbol is exported it doesn't export
+//! typeinfo unless the class itself is decorated with "visibility(default)" (i.e. ASMJIT_API).
+#if !defined(_WIN32) && defined(__GNUC__)
+  #define ASMJIT_VIRTAPI ASMJIT_API
+#else
+  #define ASMJIT_VIRTAPI
+#endif
+
+// Function attributes.
+#if !defined(ASMJIT_BUILD_DEBUG) && defined(__GNUC__)
+  #define ASMJIT_FORCE_INLINE inline __attribute__((__always_inline__))
+#elif !defined(ASMJIT_BUILD_DEBUG) && defined(_MSC_VER)
+  #define ASMJIT_FORCE_INLINE __forceinline
+#else
+  #define ASMJIT_FORCE_INLINE inline
+#endif
+
+#if defined(__GNUC__)
+  #define ASMJIT_NOINLINE __attribute__((__noinline__))
+  #define ASMJIT_NORETURN __attribute__((__noreturn__))
+#elif defined(_MSC_VER)
+  #define ASMJIT_NOINLINE __declspec(noinline)
+  #define ASMJIT_NORETURN __declspec(noreturn)
+#else
+  #define ASMJIT_NOINLINE
+  #define ASMJIT_NORETURN
+#endif
+
+// Calling conventions.
+#if ASMJIT_ARCH_X86 == 32 && defined(__GNUC__)
+  #define ASMJIT_CDECL __attribute__((__cdecl__))
+  #define ASMJIT_STDCALL __attribute__((__stdcall__))
+  #define ASMJIT_FASTCALL __attribute__((__fastcall__))
+  #define ASMJIT_REGPARM(N) __attribute__((__regparm__(N)))
+#elif ASMJIT_ARCH_X86 == 32 && defined(_MSC_VER)
+  #define ASMJIT_CDECL __cdecl
+  #define ASMJIT_STDCALL __stdcall
+  #define ASMJIT_FASTCALL __fastcall
+  #define ASMJIT_REGPARM(N)
+#else
+  #define ASMJIT_CDECL
+  #define ASMJIT_STDCALL
+  #define ASMJIT_FASTCALL
+  #define ASMJIT_REGPARM(N)
+#endif
+
+#if ASMJIT_ARCH_X86 && defined(_WIN32) && defined(_MSC_VER)
+  #define ASMJIT_VECTORCALL __vectorcall
+#elif ASMJIT_ARCH_X86 && defined(_WIN32)
+  #define ASMJIT_VECTORCALL __attribute__((__vectorcall__))
+#else
+  #define ASMJIT_VECTORCALL
+#endif
+
+// Type alignment (not allowed by C++11 'alignas' keyword).
+#if defined(__GNUC__)
+  #define ASMJIT_ALIGN_TYPE(TYPE, N) __attribute__((__aligned__(N))) TYPE
+#elif defined(_MSC_VER)
+  #define ASMJIT_ALIGN_TYPE(TYPE, N) __declspec(align(N)) TYPE
+#else
+  #define ASMJIT_ALIGN_TYPE(TYPE, N) TYPE
+#endif
+
+//! \def ASMJIT_MAY_ALIAS
+//!
+//! Expands to `__attribute__((__may_alias__))` if supported.
+#if defined(__GNUC__)
+  #define ASMJIT_MAY_ALIAS __attribute__((__may_alias__))
+#else
+  #define ASMJIT_MAY_ALIAS
+#endif
+
+//! \def ASMJIT_MAYBE_UNUSED
+//!
+//! Expands to `[[maybe_unused]]` if supported or a compiler attribute instead.
+#if __cplusplus >= 201703L
+  #define ASMJIT_MAYBE_UNUSED [[maybe_unused]]
+#elif defined(__GNUC__)
+  #define ASMJIT_MAYBE_UNUSED __attribute__((unused))
+#else
+  #define ASMJIT_MAYBE_UNUSED
+#endif
+
+#if defined(__clang_major__) && __clang_major__ >= 4 && !defined(_DOXYGEN)
+  // NOTE: Clang allows to apply this attribute to function arguments, which is what we want. Once GCC decides to
+  // support this use, we will enable it for GCC as well. However, until that, it will be clang only, which is
+  // what we need for static analysis.
+  #define ASMJIT_NONNULL(FUNCTION_ARGUMENT) FUNCTION_ARGUMENT __attribute__((__nonnull__))
+#else
+  #define ASMJIT_NONNULL(FUNCTION_ARGUMENT) FUNCTION_ARGUMENT
+#endif
+
+//! \def ASMJIT_NOEXCEPT_TYPE
+//!
+//! Defined to `noexcept` in C++17 mode or nothing otherwise. Used by function typedefs.
+#if __cplusplus >= 201703L
+  #define ASMJIT_NOEXCEPT_TYPE noexcept
+#else
+  #define ASMJIT_NOEXCEPT_TYPE
+#endif
+
+//! \def ASMJIT_ASSUME(...)
+//!
+//! Macro that tells the C/C++ compiler that the expression `...` evaluates to true.
+//!
+//! This macro has two purposes:
+//!
+//!   1. Enable optimizations that would not be possible without the assumption.
+//!   2. Hint static analysis tools that a certain condition is true to prevent false positives.
+#if defined(__clang__)
+  #define ASMJIT_ASSUME(...) __builtin_assume(__VA_ARGS__)
+#elif defined(__GNUC__)
+  #define ASMJIT_ASSUME(...) do { if (!(__VA_ARGS__)) __builtin_unreachable(); } while (0)
+#elif defined(_MSC_VER)
+  #define ASMJIT_ASSUME(...) __assume(__VA_ARGS__)
+#else
+  #define ASMJIT_ASSUME(...) (void)0
+#endif
+
+//! \def ASMJIT_LIKELY(...)
+//!
+//! Condition is likely to be taken (mostly error handling and edge cases).
+
+//! \def ASMJIT_UNLIKELY(...)
+//!
+//! Condition is unlikely to be taken (mostly error handling and edge cases).
+#if defined(__GNUC__)
+  #define ASMJIT_LIKELY(...) __builtin_expect(!!(__VA_ARGS__), 1)
+  #define ASMJIT_UNLIKELY(...) __builtin_expect(!!(__VA_ARGS__), 0)
+#else
+  #define ASMJIT_LIKELY(...) (__VA_ARGS__)
+  #define ASMJIT_UNLIKELY(...) (__VA_ARGS__)
+#endif
+
+//! \def ASMJIT_FALLTHROUGH
+//!
+//! Portable [[fallthrough]] attribute.
+#if defined(__clang__) && __cplusplus >= 201103L
+  #define ASMJIT_FALLTHROUGH [[clang::fallthrough]]
+#elif defined(__GNUC__) && __GNUC__ >= 7
+  #define ASMJIT_FALLTHROUGH __attribute__((__fallthrough__))
+#else
+  #define ASMJIT_FALLTHROUGH ((void)0) /* fallthrough */
+#endif
+
+//! \def ASMJIT_DEPRECATED
+//!
+//! Marks function, class, struct, enum, or anything else as deprecated.
+#if defined(__GNUC__)
+  #define ASMJIT_DEPRECATED(MESSAGE) __attribute__((__deprecated__(MESSAGE)))
+  #if defined(__clang__)
+    #define ASMJIT_DEPRECATED_STRUCT(MESSAGE) __attribute__((__deprecated__(MESSAGE)))
+  #else
+    #define ASMJIT_DEPRECATED_STRUCT(MESSAGE) /* not usable if a deprecated function uses it */
+  #endif
+#elif defined(_MSC_VER)
+  #define ASMJIT_DEPRECATED(MESSAGE) __declspec(deprecated(MESSAGE))
+  #define ASMJIT_DEPRECATED_STRUCT(MESSAGE) /* not usable if a deprecated function uses it */
+#else
+  #define ASMJIT_DEPRECATED(MESSAGE)
+  #define ASMJIT_DEPRECATED_STRUCT(MESSAGE)
+#endif
+
+// Utilities.
+#define ASMJIT_OFFSET_OF(STRUCT, MEMBER) ((int)(intptr_t)((const char*)&((const STRUCT*)0x100)->MEMBER) - 0x100)
+#define ASMJIT_ARRAY_SIZE(X) uint32_t(sizeof(X) / sizeof(X[0]))
+
+#if ASMJIT_CXX_HAS_ATTRIBUTE(no_sanitize, 0)
+  #define ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF __attribute__((__no_sanitize__("undefined")))
+#elif ASMJIT_CXX_GNU >= ASMJIT_CXX_MAKE_VER(4, 9)
+  #define ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF __attribute__((__no_sanitize_undefined__))
+#else
+  #define ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF
+#endif
+
+// Begin-Namespace & End-Namespace Macros
+// ======================================
+
+#if defined _DOXYGEN
+  #define ASMJIT_BEGIN_NAMESPACE namespace asmjit {
+  #define ASMJIT_END_NAMESPACE }
+#elif defined(__clang__)
+  #define ASMJIT_BEGIN_NAMESPACE                                              \
+    namespace asmjit { inline namespace ASMJIT_ABI_NAMESPACE {                \
+      _Pragma("clang diagnostic push")                                        \
+      _Pragma("clang diagnostic ignored \"-Wconstant-logical-operand\"")      \
+      _Pragma("clang diagnostic ignored \"-Wunnamed-type-template-args\"")
+  #define ASMJIT_END_NAMESPACE                                                \
+      _Pragma("clang diagnostic pop")                                         \
+    }}
+#elif defined(__GNUC__) && __GNUC__ == 4
+  #define ASMJIT_BEGIN_NAMESPACE                                              \
+    namespace asmjit { inline namespace ASMJIT_ABI_NAMESPACE {                \
+      _Pragma("GCC diagnostic push")                                          \
+      _Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"")
+  #define ASMJIT_END_NAMESPACE                                                \
+      _Pragma("GCC diagnostic pop")                                           \
+    }}
+#elif defined(__GNUC__) && __GNUC__ >= 8
+  #define ASMJIT_BEGIN_NAMESPACE                                              \
+    namespace asmjit { inline namespace ASMJIT_ABI_NAMESPACE {                \
+      _Pragma("GCC diagnostic push")                                          \
+      _Pragma("GCC diagnostic ignored \"-Wclass-memaccess\"")
+  #define ASMJIT_END_NAMESPACE                                                \
+      _Pragma("GCC diagnostic pop")                                           \
+    }}
+#elif defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+  #define ASMJIT_BEGIN_NAMESPACE                                              \
+    namespace asmjit { inline namespace ASMJIT_ABI_NAMESPACE {                \
+      __pragma(warning(push))                                                 \
+      __pragma(warning(disable: 4127))  /* conditional expression is const */ \
+      __pragma(warning(disable: 4201))  /* nameless struct/union */
+  #define ASMJIT_END_NAMESPACE                                                \
+      __pragma(warning(pop))                                                  \
+    }}
+#endif
+
+#if !defined(ASMJIT_BEGIN_NAMESPACE) && !defined(ASMJIT_END_NAMESPACE)
+  #define ASMJIT_BEGIN_NAMESPACE namespace asmjit { inline namespace ASMJIT_ABI_NAMESPACE {
+  #define ASMJIT_END_NAMESPACE }}
+#endif
+
+#define ASMJIT_BEGIN_SUB_NAMESPACE(NAMESPACE)                                 \
+  ASMJIT_BEGIN_NAMESPACE                                                      \
+  namespace NAMESPACE {
+
+#define ASMJIT_END_SUB_NAMESPACE                                              \
+  }                                                                           \
+  ASMJIT_END_NAMESPACE
+
+// C++ Utilities
+// =============
+
+#define ASMJIT_NONCOPYABLE(Type)                                              \
+    Type(const Type& other) = delete;                                         \
+    Type& operator=(const Type& other) = delete;
+
+#define ASMJIT_NONCONSTRUCTIBLE(Type)                                         \
+    Type() = delete;                                                          \
+    Type(const Type& other) = delete;                                         \
+    Type& operator=(const Type& other) = delete;
+
+//! \def ASMJIT_DEFINE_ENUM_FLAGS(T)
+//!
+//! Defines bit operations for enumeration flags.
+#ifdef _DOXYGEN
+  #define ASMJIT_DEFINE_ENUM_FLAGS(T)
+#else
+  #define ASMJIT_DEFINE_ENUM_FLAGS(T)                                         \
+    static ASMJIT_FORCE_INLINE constexpr T operator~(T a) noexcept {          \
+      return T(~(std::underlying_type<T>::type)(a));                          \
+    }                                                                         \
+                                                                              \
+    static ASMJIT_FORCE_INLINE constexpr T operator|(T a, T b) noexcept {     \
+      return T((std::underlying_type<T>::type)(a) |                           \
+              (std::underlying_type<T>::type)(b));                            \
+    }                                                                         \
+    static ASMJIT_FORCE_INLINE constexpr T operator&(T a, T b) noexcept {     \
+      return T((std::underlying_type<T>::type)(a) &                           \
+              (std::underlying_type<T>::type)(b));                            \
+    }                                                                         \
+    static ASMJIT_FORCE_INLINE constexpr T operator^(T a, T b) noexcept {     \
+      return T((std::underlying_type<T>::type)(a) ^                           \
+              (std::underlying_type<T>::type)(b));                            \
+    }                                                                         \
+                                                                              \
+    static ASMJIT_FORCE_INLINE T& operator|=(T& a, T b) noexcept {            \
+      a = T((std::underlying_type<T>::type)(a) |                              \
+            (std::underlying_type<T>::type)(b));                              \
+      return a;                                                               \
+    }                                                                         \
+    static ASMJIT_FORCE_INLINE T& operator&=(T& a, T b) noexcept {            \
+      a = T((std::underlying_type<T>::type)(a) &                              \
+            (std::underlying_type<T>::type)(b));                              \
+      return a;                                                               \
+    }                                                                         \
+    static ASMJIT_FORCE_INLINE T& operator^=(T& a, T b) noexcept {            \
+      a = T((std::underlying_type<T>::type)(a) ^                              \
+            (std::underlying_type<T>::type)(b));                              \
+      return a;                                                               \
+    }
+#endif
+
+//! \def ASMJIT_DEFINE_ENUM_COMPARE(T)
+//!
+//! Defines comparison operations for enumeration flags.
+#ifdef _DOXYGEN
+  #define ASMJIT_DEFINE_ENUM_COMPARE(T)
+#else
+  #define ASMJIT_DEFINE_ENUM_COMPARE(T)                                                \
+    static ASMJIT_FORCE_INLINE bool operator<(T a, T b) noexcept {                     \
+      return (std::underlying_type<T>::type)(a) < (std::underlying_type<T>::type)(b);  \
+    }                                                                                  \
+    static ASMJIT_FORCE_INLINE bool operator<=(T a, T b) noexcept {                    \
+      return (std::underlying_type<T>::type)(a) <= (std::underlying_type<T>::type)(b); \
+    }                                                                                  \
+    static ASMJIT_FORCE_INLINE bool operator>(T a, T b) noexcept {                     \
+      return (std::underlying_type<T>::type)(a) > (std::underlying_type<T>::type)(b);  \
+    }                                                                                  \
+    static ASMJIT_FORCE_INLINE bool operator>=(T a, T b) noexcept {                    \
+      return (std::underlying_type<T>::type)(a) >= (std::underlying_type<T>::type)(b); \
+    }
+#endif
+
+// Cleanup Api-Config Specific Macros
+// ==================================
+
+#undef ASMJIT_CXX_GNU
+#undef ASMJIT_CXX_MAKE_VER
+
+#endif // ASMJIT_CORE_API_CONFIG_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/archcommons.h b/lib/lepton/asmjit/core/archcommons.h
new file mode 100644
index 0000000000..e9d2c84d72
--- /dev/null
+++ b/lib/lepton/asmjit/core/archcommons.h
@@ -0,0 +1,229 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ARCHCOMMONS_H_INCLUDED
+#define ASMJIT_CORE_ARCHCOMMONS_H_INCLUDED
+
+// This file provides architecture-specific classes that are required in the core library. For example Imm operand
+// allows to be created from arm::Shift in a const-expr way, so the arm::Shift must be provided. So this header file
+// provides everything architecture-specific that is used by the Core API.
+
+#include "../core/globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(arm)
+
+//! \addtogroup asmjit_arm
+//! \{
+
+//! Condition code (both AArch32 & AArch64).
+//!
+//! \note This enumeration doesn't match condition code that is used in AArch32/AArch64 opcodes. In general this
+//! condition code is encoded as `(cc - 2) & 0xF` so that `kAL` condition code is zero and encoded as 0xE in opcode.
+//! This makes it easier to use a condition code as an instruction modifier that defaults to 'al'.
+enum class CondCode : uint8_t {
+  kAL             = 0x00u,      //!< (no condition code) (always)
+  kNA             = 0x01u,      //!< (not available)     (special)
+  kEQ             = 0x02u,      //!<        Z==1         (any_sign ==)
+  kNE             = 0x03u,      //!<        Z==0         (any_sign !=)
+  kCS             = 0x04u,      //!< C==1                (unsigned >=)
+  kHS             = 0x04u,      //!< C==1                (unsigned >=)
+  kCC             = 0x05u,      //!< C==0                (unsigned < )
+  kLO             = 0x05u,      //!< C==0                (unsigned < )
+  kMI             = 0x06u,      //!<               N==1  (is negative)
+  kPL             = 0x07u,      //!<               N==0  (is positive or zero)
+  kVS             = 0x08u,      //!<               V==1  (is overflow)
+  kVC             = 0x09u,      //!<               V==0  (no overflow)
+  kHI             = 0x0Au,      //!< C==1 & Z==0         (unsigned > )
+  kLS             = 0x0Bu,      //!< C==0 | Z==1         (unsigned <=)
+  kGE             = 0x0Cu,      //!<               N==V  (signed   >=)
+  kLT             = 0x0Du,      //!<               N!=V  (signed   < )
+  kGT             = 0x0Eu,      //!<        Z==0 & N==V  (signed   > )
+  kLE             = 0x0Fu,      //!<        Z==1 | N!=V  (signed   <=)
+
+  kSign           = kMI,        //!< Sign.
+  kNotSign        = kPL,        //!< Not sign.
+
+  kOverflow       = kVS,        //!< Signed overflow.
+  kNotOverflow    = kVC,        //!< Not signed overflow.
+
+  kEqual          = kEQ,        //!< Equal     `a == b`.
+  kNotEqual       = kNE,        //!< Not Equal `a != b`.
+
+  kZero           = kEQ,        //!< Zero (alias to equal).
+  kNotZero        = kNE,        //!< Not Zero (alias to Not Equal).
+
+  kNegative       = kMI,        //!< Negative.
+  kPositive       = kPL,        //!< Positive or zero.
+
+  kSignedLT       = kLT,        //!< Signed    `a <  b`.
+  kSignedLE       = kLE,        //!< Signed    `a <= b`.
+  kSignedGT       = kGT,        //!< Signed    `a >  b`.
+  kSignedGE       = kGE,        //!< Signed    `a >= b`.
+
+  kUnsignedLT     = kLO,        //!< Unsigned  `a <  b`.
+  kUnsignedLE     = kLS,        //!< Unsigned  `a <= b`.
+  kUnsignedGT     = kHI,        //!< Unsigned  `a >  b`.
+  kUnsignedGE     = kHS,        //!< Unsigned  `a >= b`.
+
+  kAlways         = kAL,        //!< No condition code (always).
+
+  kMaxValue       = 0x0Fu       //!< Maximum value of `CondCode`.
+};
+
+//! Negates a condition code.
+static inline constexpr CondCode negateCond(CondCode cond) noexcept { return CondCode(uint8_t(cond) ^ uint8_t(1)); }
+
+//! Data type that can be encoded with the instruction (AArch32 only).
+enum class DataType : uint32_t {
+  //! No data type specified (default for all general purpose instructions).
+  kNone = 0,
+  //! 8-bit signed integer, specified as `.s8` in assembly.
+  kS8 = 1,
+  //! 16-bit signed integer, specified as `.s16` in assembly.
+  kS16 = 2,
+  //! 32-bit signed integer, specified as `.s32` in assembly.
+  kS32 = 3,
+  //! 64-bit signed integer, specified as `.s64` in assembly.
+  kS64 = 4,
+  //! 8-bit unsigned integer, specified as `.u8` in assembly.
+  kU8 = 5,
+  //! 16-bit unsigned integer, specified as `.u16` in assembly.
+  kU16 = 6,
+  //! 32-bit unsigned integer, specified as `.u32` in assembly.
+  kU32 = 7,
+  //! 64-bit unsigned integer, specified as `.u64` in assembly.
+  kU64 = 8,
+  //! 16-bit floating point (half precision), specified as `.f16` in assembly.
+  kF16 = 10,
+  //! 32-bit floating point (single precision), specified as `.f32` in assembly.
+  kF32 = 11,
+  //! 64-bit floating point (double precision), specified as `.f64` in assembly.
+  kF64 = 12,
+  //! 8-bit polynomial.
+  kP8 = 13,
+  //! 64-bit polynomial.
+  kP64 = 15,
+
+  //! Maximum value of `DataType`.
+  kMaxValue = 15
+};
+
+//! Shift operation predicate (ARM) describes either SHIFT or EXTEND operation.
+//!
+//! \note The constants are AsmJit specific. The first 5 values describe real constants on ARM32 and AArch64 hardware,
+//! however, the addition constants that describe extend modes are specific to AsmJit and would be translated to the
+//! AArch64 specific constants by the assembler.
+enum class ShiftOp : uint32_t {
+  //! Shift left logical operation (default).
+  //!
+  //! Available to all ARM architectures.
+  kLSL = 0x00u,
+
+  //! Shift right logical operation.
+  //!
+  //! Available to all ARM architectures.
+  kLSR = 0x01u,
+
+  //! Shift right arithmetic operation.
+  //!
+  //! Available to all ARM architectures.
+  kASR = 0x02u,
+
+  //! Rotate right operation (AArch32 only).
+  kROR = 0x03u,
+
+  //! Rotate right with carry operation (encoded as `ShiftOp::kROR` with zero) (AArch32 only).
+  kRRX = 0x04u,
+
+  //! Shift left by filling low order bits with ones.
+  kMSL = 0x05u,
+
+  //! UXTN extend register operation (AArch64 only).
+  kUXTB = 0x06u,
+  //! UXTH extend register operation (AArch64 only).
+  kUXTH = 0x07u,
+  //! UXTW extend register operation (AArch64 only).
+  kUXTW = 0x08u,
+  //! UXTX extend register operation (AArch64 only).
+  kUXTX = 0x09u,
+
+  //! SXTB extend register operation (AArch64 only).
+  kSXTB = 0x0Au,
+  //! SXTH extend register operation (AArch64 only).
+  kSXTH = 0x0Bu,
+  //! SXTW extend register operation (AArch64 only).
+  kSXTW = 0x0Cu,
+  //! SXTX extend register operation (AArch64 only).
+  kSXTX = 0x0Du
+
+  // NOTE: 0xE and 0xF are used by memory operand to specify POST|PRE offset mode.
+};
+
+//! Represents ARM immediate shift operation type and value.
+class Shift {
+public:
+  //! Shift operation.
+  ShiftOp _op;
+  //! Shift Value.
+  uint32_t _value;
+
+  //! Default constructed Shift is not initialized.
+  inline Shift() noexcept = default;
+
+  //! Copy constructor (default)
+  constexpr Shift(const Shift& other) noexcept = default;
+
+  //! Constructs Shift from operation `op` and shift `value`.
+  constexpr Shift(ShiftOp op, uint32_t value) noexcept
+    : _op(op),
+      _value(value) {}
+
+  //! Returns the shift operation.
+  constexpr ShiftOp op() const noexcept { return _op; }
+  //! Sets shift operation to `op`.
+  inline void setOp(ShiftOp op) noexcept { _op = op; }
+
+  //! Returns the shift smount.
+  constexpr uint32_t value() const noexcept { return _value; }
+  //! Sets shift amount to `value`.
+  inline void setValue(uint32_t value) noexcept { _value = value; }
+};
+
+//! Constructs a `LSL #value` shift (logical shift left).
+static constexpr Shift lsl(uint32_t value) noexcept { return Shift(ShiftOp::kLSL, value); }
+//! Constructs a `LSR #value` shift (logical shift right).
+static constexpr Shift lsr(uint32_t value) noexcept { return Shift(ShiftOp::kLSR, value); }
+//! Constructs a `ASR #value` shift (arithmetic shift right).
+static constexpr Shift asr(uint32_t value) noexcept { return Shift(ShiftOp::kASR, value); }
+//! Constructs a `ROR #value` shift (rotate right).
+static constexpr Shift ror(uint32_t value) noexcept { return Shift(ShiftOp::kROR, value); }
+//! Constructs a `RRX` shift (rotate with carry by 1).
+static constexpr Shift rrx() noexcept { return Shift(ShiftOp::kRRX, 0); }
+//! Constructs a `MSL #value` shift (logical shift left filling ones).
+static constexpr Shift msl(uint32_t value) noexcept { return Shift(ShiftOp::kMSL, value); }
+
+//! Constructs a `UXTB #value` extend and shift (unsigned byte extend).
+static constexpr Shift uxtb(uint32_t value) noexcept { return Shift(ShiftOp::kUXTB, value); }
+//! Constructs a `UXTH #value` extend and shift (unsigned hword extend).
+static constexpr Shift uxth(uint32_t value) noexcept { return Shift(ShiftOp::kUXTH, value); }
+//! Constructs a `UXTW #value` extend and shift (unsigned word extend).
+static constexpr Shift uxtw(uint32_t value) noexcept { return Shift(ShiftOp::kUXTW, value); }
+//! Constructs a `UXTX #value` extend and shift (unsigned dword extend).
+static constexpr Shift uxtx(uint32_t value) noexcept { return Shift(ShiftOp::kUXTX, value); }
+
+//! Constructs a `SXTB #value` extend and shift (signed byte extend).
+static constexpr Shift sxtb(uint32_t value) noexcept { return Shift(ShiftOp::kSXTB, value); }
+//! Constructs a `SXTH #value` extend and shift (signed hword extend).
+static constexpr Shift sxth(uint32_t value) noexcept { return Shift(ShiftOp::kSXTH, value); }
+//! Constructs a `SXTW #value` extend and shift (signed word extend).
+static constexpr Shift sxtw(uint32_t value) noexcept { return Shift(ShiftOp::kSXTW, value); }
+//! Constructs a `SXTX #value` extend and shift (signed dword extend).
+static constexpr Shift sxtx(uint32_t value) noexcept { return Shift(ShiftOp::kSXTX, value); }
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_CORE_ARCHCOMMONS_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/archtraits.cpp b/lib/lepton/asmjit/core/archtraits.cpp
new file mode 100644
index 0000000000..fc825df800
--- /dev/null
+++ b/lib/lepton/asmjit/core/archtraits.cpp
@@ -0,0 +1,160 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/archtraits.h"
+#include "../core/misc_p.h"
+
+#if !defined(ASMJIT_NO_X86)
+  #include "../x86/x86archtraits_p.h"
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  #include "../arm/a64archtraits_p.h"
+#endif
+
+ASMJIT_BEGIN_NAMESPACE
+
+static const constexpr ArchTraits noArchTraits = {
+  // SP/FP/LR/PC.
+  0xFF, 0xFF, 0xFF, 0xFF,
+
+  // Reserved,
+  { 0, 0, 0 },
+
+  // HW stack alignment.
+  0,
+
+  // Min/Max stack offset.
+  0, 0,
+
+  // ISA features [Gp, Vec, Other0, Other1].
+  {{
+    InstHints::kNoHints,
+    InstHints::kNoHints,
+    InstHints::kNoHints,
+    InstHints::kNoHints
+  }},
+
+  // RegTypeToSignature.
+  #define V(index) OperandSignature{0}
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // RegTypeToTypeId.
+  #define V(index) TypeId::kVoid
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // TypeIdToRegType.
+  #define V(index) RegType::kNone
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // Word names of 8-bit, 16-bit, 32-bit, and 64-bit quantities.
+  {
+    ArchTypeNameId::kByte,
+    ArchTypeNameId::kHalf,
+    ArchTypeNameId::kWord,
+    ArchTypeNameId::kQuad
+  }
+};
+
+ASMJIT_VARAPI const ArchTraits _archTraits[uint32_t(Arch::kMaxValue) + 1] = {
+  // No architecture.
+  noArchTraits,
+
+  // X86/X86 architectures.
+#if !defined(ASMJIT_NO_X86)
+  x86::x86ArchTraits,
+  x86::x64ArchTraits,
+#else
+  noArchTraits,
+  noArchTraits,
+#endif
+
+  // RISCV32/RISCV64 architectures.
+  noArchTraits,
+  noArchTraits,
+
+  // ARM architecture
+  noArchTraits,
+
+  // AArch64 architecture.
+#if !defined(ASMJIT_NO_AARCH64)
+  a64::a64ArchTraits,
+#else
+  noArchTraits,
+#endif
+
+  // ARM/Thumb architecture.
+  noArchTraits,
+
+  // Reserved.
+  noArchTraits,
+
+  // MIPS32/MIPS64
+  noArchTraits,
+  noArchTraits
+};
+
+ASMJIT_FAVOR_SIZE Error ArchUtils::typeIdToRegSignature(Arch arch, TypeId typeId, TypeId* typeIdOut, OperandSignature* regSignatureOut) noexcept {
+  const ArchTraits& archTraits = ArchTraits::byArch(arch);
+
+  // TODO: Remove this, should never be used like this.
+  // Passed RegType instead of TypeId?
+  if (uint32_t(typeId) <= uint32_t(RegType::kMaxValue))
+    typeId = archTraits.regTypeToTypeId(RegType(uint32_t(typeId)));
+
+  if (ASMJIT_UNLIKELY(!TypeUtils::isValid(typeId)))
+    return DebugUtils::errored(kErrorInvalidTypeId);
+
+  // First normalize architecture dependent types.
+  if (TypeUtils::isAbstract(typeId)) {
+    bool is32Bit = Environment::is32Bit(arch);
+    if (typeId == TypeId::kIntPtr)
+      typeId = is32Bit ? TypeId::kInt32 : TypeId::kInt64;
+    else
+      typeId = is32Bit ? TypeId::kUInt32 : TypeId::kUInt64;
+  }
+
+  // Type size helps to construct all groups of registers.
+  // TypeId is invalid if the size is zero.
+  uint32_t size = TypeUtils::sizeOf(typeId);
+  if (ASMJIT_UNLIKELY(!size))
+    return DebugUtils::errored(kErrorInvalidTypeId);
+
+  if (ASMJIT_UNLIKELY(typeId == TypeId::kFloat80))
+    return DebugUtils::errored(kErrorInvalidUseOfF80);
+
+  RegType regType = RegType::kNone;
+  if (TypeUtils::isBetween(typeId, TypeId::_kBaseStart, TypeId::_kVec32Start)) {
+    regType = archTraits._typeIdToRegType[uint32_t(typeId) - uint32_t(TypeId::_kBaseStart)];
+    if (regType == RegType::kNone) {
+      if (typeId == TypeId::kInt64 || typeId == TypeId::kUInt64)
+        return DebugUtils::errored(kErrorInvalidUseOfGpq);
+      else
+        return DebugUtils::errored(kErrorInvalidTypeId);
+    }
+  }
+  else {
+    if (size <= 8 && archTraits._regSignature[RegType::kVec64].isValid())
+      regType = RegType::kVec64;
+    else if (size <= 16 && archTraits._regSignature[RegType::kVec128].isValid())
+      regType = RegType::kVec128;
+    else if (size == 32 && archTraits._regSignature[RegType::kVec256].isValid())
+      regType = RegType::kVec256;
+    else if (archTraits._regSignature[RegType::kVec512].isValid())
+      regType = RegType::kVec512;
+    else
+      return DebugUtils::errored(kErrorInvalidTypeId);
+  }
+
+  *typeIdOut = typeId;
+  *regSignatureOut = archTraits.regTypeToSignature(regType);
+  return kErrorOk;
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/archtraits.h b/lib/lepton/asmjit/core/archtraits.h
new file mode 100644
index 0000000000..192a826e51
--- /dev/null
+++ b/lib/lepton/asmjit/core/archtraits.h
@@ -0,0 +1,290 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ARCHTRAITS_H_INCLUDED
+#define ASMJIT_CORE_ARCHTRAITS_H_INCLUDED
+
+#include "../core/operand.h"
+#include "../core/support.h"
+#include "../core/type.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_core
+//! \{
+
+//! Instruction set architecture (ISA).
+enum class Arch : uint8_t {
+  //! Unknown or uninitialized ISA.
+  kUnknown = 0,
+
+  //! 32-bit X86 ISA.
+  kX86 = 1,
+  //! 64-bit X86 ISA also known as X64, X86_64, and AMD64.
+  kX64 = 2,
+
+  //! 32-bit RISC-V ISA.
+  kRISCV32 = 3,
+  //! 64-bit RISC-V ISA.
+  kRISCV64 = 4,
+
+  //! 32-bit ARM ISA (little endian).
+  kARM = 5,
+  //! 64-bit ARM ISA in (little endian).
+  kAArch64 = 6,
+  //! 32-bit ARM ISA in Thumb mode (little endian).
+  kThumb = 7,
+
+  // 8 is not used at the moment, even numbers are 64-bit architectures.
+
+  //! 32-bit MIPS ISA in (little endian).
+  kMIPS32_LE = 9,
+  //! 64-bit MIPS ISA in (little endian).
+  kMIPS64_LE = 10,
+
+  //! 32-bit ARM ISA (big endian).
+  kARM_BE = 11,
+  //! 64-bit ARM ISA in (big endian).
+  kAArch64_BE = 12,
+  //! 32-bit ARM ISA in Thumb mode (big endian).
+  kThumb_BE = 13,
+
+  // 14 is not used at the moment, even numbers are 64-bit architectures.
+
+  //! 32-bit MIPS ISA in (big endian).
+  kMIPS32_BE = 15,
+  //! 64-bit MIPS ISA in (big endian).
+  kMIPS64_BE = 16,
+
+  //! Maximum value of `Arch`.
+  kMaxValue = kMIPS64_BE,
+
+  //! Mask used by 32-bit ISAs (odd are 32-bit, even are 64-bit).
+  k32BitMask = 0x01,
+  //! First big-endian architecture.
+  kBigEndian = kARM_BE,
+
+  //! ISA detected at compile-time (ISA of the host).
+  kHost =
+#if defined(_DOXYGEN)
+    DETECTED_AT_COMPILE_TIME
+#else
+    ASMJIT_ARCH_X86 == 32 ? kX86 :
+    ASMJIT_ARCH_X86 == 64 ? kX64 :
+
+    ASMJIT_ARCH_ARM == 32 && ASMJIT_ARCH_LE ? kARM :
+    ASMJIT_ARCH_ARM == 32 && ASMJIT_ARCH_BE ? kARM_BE :
+    ASMJIT_ARCH_ARM == 64 && ASMJIT_ARCH_LE ? kAArch64 :
+    ASMJIT_ARCH_ARM == 64 && ASMJIT_ARCH_BE ? kAArch64_BE :
+
+    ASMJIT_ARCH_MIPS == 32 && ASMJIT_ARCH_LE ? kMIPS32_LE :
+    ASMJIT_ARCH_MIPS == 32 && ASMJIT_ARCH_BE ? kMIPS32_BE :
+    ASMJIT_ARCH_MIPS == 64 && ASMJIT_ARCH_LE ? kMIPS64_LE :
+    ASMJIT_ARCH_MIPS == 64 && ASMJIT_ARCH_BE ? kMIPS64_BE :
+
+    kUnknown
+#endif
+};
+
+//! Sub-architecture.
+enum class SubArch : uint8_t {
+  //! Unknown or uninitialized architecture sub-type.
+  kUnknown = 0,
+
+  //! Maximum value of `SubArch`.
+  kMaxValue = kUnknown,
+
+  //! Sub-architecture detected at compile-time (sub-architecture of the host).
+  kHost =
+#if defined(_DOXYGEN)
+    DETECTED_AT_COMPILE_TIME
+#else
+    kUnknown
+#endif
+};
+
+//! Identifier used to represent names of different data types across architectures.
+enum class ArchTypeNameId : uint8_t {
+  //! Describes 'db' (X86/X86_64 convention, always 8-bit quantity).
+  kDB = 0,
+  //! Describes 'dw' (X86/X86_64 convention, always 16-bit word).
+  kDW,
+  //! Describes 'dd' (X86/X86_64 convention, always 32-bit word).
+  kDD,
+  //! Describes 'dq' (X86/X86_64 convention, always 64-bit word).
+  kDQ,
+  //! Describes 'byte' (always 8-bit quantity).
+  kByte,
+  //! Describes 'half' (most likely 16-bit word).
+  kHalf,
+  //! Describes 'word' (either 16-bit or 32-bit word).
+  kWord,
+  //! Describes 'hword' (most likely 16-bit word).
+  kHWord,
+  //! Describes 'dword' (either 32-bit or 64-bit word).
+  kDWord,
+  //! Describes 'qword' (64-bit word).
+  kQWord,
+  //! Describes 'xword' (64-bit word).
+  kXWord,
+  //! Describes 'short' (always 16-bit word).
+  kShort,
+  //! Describes 'long' (most likely 32-bit word).
+  kLong,
+  //! Describes 'quad' (64-bit word).
+  kQuad,
+
+  //! Maximum value of `ArchTypeNameId`.
+  kMaxValue = kQuad
+};
+
+//! Instruction feature hints for each register group provided by \ref ArchTraits.
+//!
+//! Instruction feature hints describe miscellaneous instructions provided by the architecture that can be used by
+//! register allocator to make certain things simpler - like register swaps or emitting register push/pop sequences.
+//!
+//! \remarks Instruction feature hints are only defined for register groups that can be used with \ref
+//! asmjit_compiler infrastructure. Register groups that are not managed by Compiler are not provided by
+//! \ref ArchTraits and cannot be queried.
+enum class InstHints : uint8_t {
+  //! No feature hints.
+  kNoHints = 0,
+
+  //! Architecture supports a register swap by using a single instruction.
+  kRegSwap = 0x01u,
+  //! Architecture provides push/pop instructions.
+  kPushPop = 0x02u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(InstHints)
+
+//! Architecture traits used by Function API and Compiler's register allocator.
+struct ArchTraits {
+  //! \name Members
+  //! \{
+
+  //! Stack pointer register id.
+  uint8_t _spRegId;
+  //! Frame pointer register id.
+  uint8_t _fpRegId;
+  //! Link register id.
+  uint8_t _linkRegId;
+  //! Instruction pointer (or program counter) register id, if accessible.
+  uint8_t _ipRegId;
+
+  // Reserved.
+  uint8_t _reserved[3];
+  //! Hardware stack alignment requirement.
+  uint8_t _hwStackAlignment;
+
+  //! Minimum addressable offset on stack guaranteed for all instructions.
+  uint32_t _minStackOffset;
+  //! Maximum addressable offset on stack depending on specific instruction.
+  uint32_t _maxStackOffset;
+
+  //! Flags for each virtual register group.
+  Support::Array<InstHints, Globals::kNumVirtGroups> _instHints;
+
+  //! Maps register type into a signature, that provides group, size and can be used to construct register operands.
+  Support::Array<OperandSignature, uint32_t(RegType::kMaxValue) + 1> _regSignature;
+  //! Maps a register to type-id, see \ref TypeId.
+  Support::Array<TypeId, uint32_t(RegType::kMaxValue) + 1> _regTypeToTypeId;
+  //! Maps scalar TypeId values (from TypeId::_kIdBaseStart) to register types, see \ref TypeId.
+  Support::Array<RegType, 32> _typeIdToRegType;
+
+  //! Word name identifiers of 8-bit, 16-bit, 32-biit, and 64-bit quantities that appear in formatted text.
+  ArchTypeNameId _typeNameIdTable[4];
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns stack pointer register id.
+  inline uint32_t spRegId() const noexcept { return _spRegId; }
+  //! Returns stack frame register id.
+  inline uint32_t fpRegId() const noexcept { return _fpRegId; }
+  //! Returns link register id, if the architecture provides it.
+  inline uint32_t linkRegId() const noexcept { return _linkRegId; }
+  //! Returns instruction pointer register id, if the architecture provides it.
+  inline uint32_t ipRegId() const noexcept { return _ipRegId; }
+
+  //! Returns a hardware stack alignment requirement.
+  //!
+  //! \note This is a hardware constraint. Architectures that don't constrain it would return the lowest alignment
+  //! (1), however, some architectures may constrain the alignment, for example AArch64 requires 16-byte alignment.
+  inline uint32_t hwStackAlignment() const noexcept { return _hwStackAlignment; }
+
+  //! Tests whether the architecture provides link register, which is used across function calls. If the link
+  //! register is not provided then a function call pushes the return address on stack (X86/X64).
+  inline bool hasLinkReg() const noexcept { return _linkRegId != BaseReg::kIdBad; }
+
+  //! Returns minimum addressable offset on stack guaranteed for all instructions.
+  inline uint32_t minStackOffset() const noexcept { return _minStackOffset; }
+  //! Returns maximum addressable offset on stack depending on specific instruction.
+  inline uint32_t maxStackOffset() const noexcept { return _maxStackOffset; }
+
+  //! Returns ISA flags of the given register `group`.
+  inline InstHints instFeatureHints(RegGroup group) const noexcept { return _instHints[group]; }
+  //! Tests whether the given register `group` has the given `flag` set.
+  inline bool hasInstHint(RegGroup group, InstHints feature) const noexcept { return Support::test(_instHints[group], feature); }
+  //! Tests whether the ISA provides register swap instruction for the given register `group`.
+  inline bool hasInstRegSwap(RegGroup group) const noexcept { return hasInstHint(group, InstHints::kRegSwap); }
+  //! Tests whether the ISA provides push/pop instructions for the given register `group`.
+  inline bool hasInstPushPop(RegGroup group) const noexcept { return hasInstHint(group, InstHints::kPushPop); }
+
+  inline bool hasRegType(RegType type) const noexcept {
+    return type <= RegType::kMaxValue && _regSignature[type].isValid();
+  }
+
+  //! Returns an operand signature from the given register `type` of this architecture.
+  inline OperandSignature regTypeToSignature(RegType type) const noexcept { return _regSignature[type]; }
+  //! Returns a register from the given register `type` of this architecture.
+  inline RegGroup regTypeToGroup(RegType type) const noexcept { return _regSignature[type].regGroup(); }
+  //! Returns a register size the given register `type` of this architecture.
+  inline uint32_t regTypeToSize(RegType type) const noexcept { return _regSignature[type].size(); }
+  //! Returns a corresponding `TypeId` from the given register `type` of this architecture.
+  inline TypeId regTypeToTypeId(RegType type) const noexcept { return _regTypeToTypeId[type]; }
+
+  //! Returns a table of ISA word names that appear in formatted text. Word names are ISA dependent.
+  //!
+  //! The index of this table is log2 of the size:
+  //!   - [0] 8-bits
+  //!   - [1] 16-bits
+  //!   - [2] 32-bits
+  //!   - [3] 64-bits
+  inline const ArchTypeNameId* typeNameIdTable() const noexcept { return _typeNameIdTable; }
+
+  //! Returns an ISA word name identifier of the given `index`, see \ref typeNameIdTable() for more details.
+  inline ArchTypeNameId typeNameIdByIndex(uint32_t index) const noexcept { return _typeNameIdTable[index]; }
+
+  //! \}
+
+  //! \name Statics
+  //! \{
+
+  //! Returns a const reference to `ArchTraits` for the given architecture `arch`.
+  static inline const ArchTraits& byArch(Arch arch) noexcept;
+
+  //! \}
+};
+
+ASMJIT_VARAPI const ArchTraits _archTraits[uint32_t(Arch::kMaxValue) + 1];
+
+//! \cond
+inline const ArchTraits& ArchTraits::byArch(Arch arch) noexcept { return _archTraits[uint32_t(arch)]; }
+//! \endcond
+
+//! Architecture utilities.
+namespace ArchUtils {
+
+ASMJIT_API Error typeIdToRegSignature(Arch arch, TypeId typeId, TypeId* typeIdOut, OperandSignature* regSignatureOut) noexcept;
+
+} // {ArchUtils}
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ARCHTRAITS_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/assembler.cpp b/lib/lepton/asmjit/core/assembler.cpp
new file mode 100644
index 0000000000..d6c87627ec
--- /dev/null
+++ b/lib/lepton/asmjit/core/assembler.cpp
@@ -0,0 +1,406 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/assembler.h"
+#include "../core/codewriter_p.h"
+#include "../core/constpool.h"
+#include "../core/emitterutils_p.h"
+#include "../core/formatter.h"
+#include "../core/logger.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// BaseAssembler - Construction & Destruction
+// ==========================================
+
+BaseAssembler::BaseAssembler() noexcept
+  : BaseEmitter(EmitterType::kAssembler) {}
+
+BaseAssembler::~BaseAssembler() noexcept {}
+
+// BaseAssembler - Buffer Management
+// =================================
+
+Error BaseAssembler::setOffset(size_t offset) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  size_t size = Support::max<size_t>(_section->bufferSize(), this->offset());
+  if (ASMJIT_UNLIKELY(offset > size))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  _bufferPtr = _bufferData + offset;
+  return kErrorOk;
+}
+
+// BaseAssembler - Section Management
+// ==================================
+
+static void BaseAssembler_initSection(BaseAssembler* self, Section* section) noexcept {
+  uint8_t* p = section->_buffer._data;
+
+  self->_section = section;
+  self->_bufferData = p;
+  self->_bufferPtr  = p + section->_buffer._size;
+  self->_bufferEnd  = p + section->_buffer._capacity;
+}
+
+Error BaseAssembler::section(Section* section) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  if (!_code->isSectionValid(section->id()) || _code->_sections[section->id()] != section)
+    return reportError(DebugUtils::errored(kErrorInvalidSection));
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger)
+    _logger->logf(".section %s {#%u}\n", section->name(), section->id());
+#endif
+
+  BaseAssembler_initSection(this, section);
+  return kErrorOk;
+}
+
+// BaseAssembler - Label Management
+// ================================
+
+Label BaseAssembler::newLabel() {
+  uint32_t labelId = Globals::kInvalidId;
+  if (ASMJIT_LIKELY(_code)) {
+    LabelEntry* le;
+    Error err = _code->newLabelEntry(&le);
+    if (ASMJIT_UNLIKELY(err))
+      reportError(err);
+    else
+      labelId = le->id();
+  }
+  return Label(labelId);
+}
+
+Label BaseAssembler::newNamedLabel(const char* name, size_t nameSize, LabelType type, uint32_t parentId) {
+  uint32_t labelId = Globals::kInvalidId;
+  if (ASMJIT_LIKELY(_code)) {
+    LabelEntry* le;
+    Error err = _code->newNamedLabelEntry(&le, name, nameSize, type, parentId);
+    if (ASMJIT_UNLIKELY(err))
+      reportError(err);
+    else
+      labelId = le->id();
+  }
+  return Label(labelId);
+}
+
+Error BaseAssembler::bind(const Label& label) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  Error err = _code->bindLabel(label, _section->id(), offset());
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger)
+    EmitterUtils::logLabelBound(this, label);
+#endif
+
+  resetInlineComment();
+  if (err)
+    return reportError(err);
+
+  return kErrorOk;
+}
+
+// BaseAssembler - Embed
+// =====================
+
+Error BaseAssembler::embed(const void* data, size_t dataSize) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  if (dataSize == 0)
+    return kErrorOk;
+
+  CodeWriter writer(this);
+  ASMJIT_PROPAGATE(writer.ensureSpace(this, dataSize));
+
+  writer.emitData(data, dataSize);
+  writer.done(this);
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger) {
+    StringTmp<512> sb;
+    Formatter::formatData(sb, _logger->flags(), arch(), TypeId::kUInt8, data, dataSize, 1);
+    sb.append('\n');
+    _logger->log(sb);
+  }
+#endif
+
+  return kErrorOk;
+}
+
+Error BaseAssembler::embedDataArray(TypeId typeId, const void* data, size_t itemCount, size_t repeatCount) {
+  uint32_t deabstractDelta = TypeUtils::deabstractDeltaOfSize(registerSize());
+  TypeId finalTypeId = TypeUtils::deabstract(typeId, deabstractDelta);
+
+  if (ASMJIT_UNLIKELY(!TypeUtils::isValid(finalTypeId)))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  if (itemCount == 0 || repeatCount == 0)
+    return kErrorOk;
+
+  uint32_t typeSize = TypeUtils::sizeOf(finalTypeId);
+  Support::FastUInt8 of = 0;
+
+  size_t dataSize = Support::mulOverflow(itemCount, size_t(typeSize), &of);
+  size_t totalSize = Support::mulOverflow(dataSize, repeatCount, &of);
+
+  if (ASMJIT_UNLIKELY(of))
+    return reportError(DebugUtils::errored(kErrorOutOfMemory));
+
+  CodeWriter writer(this);
+  ASMJIT_PROPAGATE(writer.ensureSpace(this, totalSize));
+
+  for (size_t i = 0; i < repeatCount; i++)
+    writer.emitData(data, dataSize);
+
+  writer.done(this);
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger) {
+    StringTmp<512> sb;
+    Formatter::formatData(sb, _logger->flags(), arch(), typeId, data, itemCount, repeatCount);
+    sb.append('\n');
+    _logger->log(sb);
+  }
+#endif
+
+  return kErrorOk;
+}
+
+#ifndef ASMJIT_NO_LOGGING
+static const TypeId dataTypeIdBySize[9] = {
+  TypeId::kVoid,   // [0] (invalid)
+  TypeId::kUInt8,  // [1] (uint8_t)
+  TypeId::kUInt16, // [2] (uint16_t)
+  TypeId::kVoid,   // [3] (invalid)
+  TypeId::kUInt32, // [4] (uint32_t)
+  TypeId::kVoid,   // [5] (invalid)
+  TypeId::kVoid,   // [6] (invalid)
+  TypeId::kVoid,   // [7] (invalid)
+  TypeId::kUInt64  // [8] (uint64_t)
+};
+#endif
+
+Error BaseAssembler::embedConstPool(const Label& label, const ConstPool& pool) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  if (ASMJIT_UNLIKELY(!isLabelValid(label)))
+    return reportError(DebugUtils::errored(kErrorInvalidLabel));
+
+  ASMJIT_PROPAGATE(align(AlignMode::kData, uint32_t(pool.alignment())));
+  ASMJIT_PROPAGATE(bind(label));
+
+  size_t size = pool.size();
+  if (!size)
+    return kErrorOk;
+
+  CodeWriter writer(this);
+  ASMJIT_PROPAGATE(writer.ensureSpace(this, size));
+
+#ifndef ASMJIT_NO_LOGGING
+  uint8_t* data = writer.cursor();
+#endif
+
+  pool.fill(writer.cursor());
+  writer.advance(size);
+  writer.done(this);
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger) {
+    uint32_t dataSizeLog2 = Support::min<uint32_t>(Support::ctz(pool.minItemSize()), 3);
+    uint32_t dataSize = 1 << dataSizeLog2;
+
+    StringTmp<512> sb;
+    Formatter::formatData(sb, _logger->flags(), arch(), dataTypeIdBySize[dataSize], data, size >> dataSizeLog2);
+    sb.append('\n');
+    _logger->log(sb);
+  }
+#endif
+
+  return kErrorOk;
+}
+
+Error BaseAssembler::embedLabel(const Label& label, size_t dataSize) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  ASMJIT_ASSERT(_code != nullptr);
+  RelocEntry* re;
+  LabelEntry* le = _code->labelEntry(label);
+
+  if (ASMJIT_UNLIKELY(!le))
+    return reportError(DebugUtils::errored(kErrorInvalidLabel));
+
+  if (dataSize == 0)
+    dataSize = registerSize();
+
+  if (ASMJIT_UNLIKELY(!Support::isPowerOf2(dataSize) || dataSize > 8))
+    return reportError(DebugUtils::errored(kErrorInvalidOperandSize));
+
+  CodeWriter writer(this);
+  ASMJIT_PROPAGATE(writer.ensureSpace(this, dataSize));
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger) {
+    StringTmp<256> sb;
+    sb.append('.');
+    Formatter::formatDataType(sb, _logger->flags(), arch(), dataTypeIdBySize[dataSize]);
+    sb.append(' ');
+    Formatter::formatLabel(sb, FormatFlags::kNone, this, label.id());
+    sb.append('\n');
+    _logger->log(sb);
+  }
+#endif
+
+  Error err = _code->newRelocEntry(&re, RelocType::kRelToAbs);
+  if (ASMJIT_UNLIKELY(err))
+    return reportError(err);
+
+  re->_sourceSectionId = _section->id();
+  re->_sourceOffset = offset();
+  re->_format.resetToSimpleValue(OffsetType::kUnsignedOffset, dataSize);
+
+  if (le->isBound()) {
+    re->_targetSectionId = le->section()->id();
+    re->_payload = le->offset();
+  }
+  else {
+    OffsetFormat of;
+    of.resetToSimpleValue(OffsetType::kUnsignedOffset, dataSize);
+
+    LabelLink* link = _code->newLabelLink(le, _section->id(), offset(), 0, of);
+    if (ASMJIT_UNLIKELY(!link))
+      return reportError(DebugUtils::errored(kErrorOutOfMemory));
+
+    link->relocId = re->id();
+  }
+
+  // Emit dummy DWORD/QWORD depending on the data size.
+  writer.emitZeros(dataSize);
+  writer.done(this);
+
+  return kErrorOk;
+}
+
+Error BaseAssembler::embedLabelDelta(const Label& label, const Label& base, size_t dataSize) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  LabelEntry* labelEntry = _code->labelEntry(label);
+  LabelEntry* baseEntry = _code->labelEntry(base);
+
+  if (ASMJIT_UNLIKELY(!labelEntry || !baseEntry))
+    return reportError(DebugUtils::errored(kErrorInvalidLabel));
+
+  if (dataSize == 0)
+    dataSize = registerSize();
+
+  if (ASMJIT_UNLIKELY(!Support::isPowerOf2(dataSize) || dataSize > 8))
+    return reportError(DebugUtils::errored(kErrorInvalidOperandSize));
+
+  CodeWriter writer(this);
+  ASMJIT_PROPAGATE(writer.ensureSpace(this, dataSize));
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger) {
+    StringTmp<256> sb;
+    sb.append('.');
+    Formatter::formatDataType(sb, _logger->flags(), arch(), dataTypeIdBySize[dataSize]);
+    sb.append(" (");
+    Formatter::formatLabel(sb, FormatFlags::kNone, this, label.id());
+    sb.append(" - ");
+    Formatter::formatLabel(sb, FormatFlags::kNone, this, base.id());
+    sb.append(")\n");
+    _logger->log(sb);
+  }
+#endif
+
+  // If both labels are bound within the same section it means the delta can be calculated now.
+  if (labelEntry->isBound() && baseEntry->isBound() && labelEntry->section() == baseEntry->section()) {
+    uint64_t delta = labelEntry->offset() - baseEntry->offset();
+    writer.emitValueLE(delta, dataSize);
+  }
+  else {
+    RelocEntry* re;
+    Error err = _code->newRelocEntry(&re, RelocType::kExpression);
+    if (ASMJIT_UNLIKELY(err))
+      return reportError(err);
+
+    Expression* exp = _code->_zone.newT<Expression>();
+    if (ASMJIT_UNLIKELY(!exp))
+      return reportError(DebugUtils::errored(kErrorOutOfMemory));
+
+    exp->reset();
+    exp->opType = ExpressionOpType::kSub;
+    exp->setValueAsLabel(0, labelEntry);
+    exp->setValueAsLabel(1, baseEntry);
+
+    re->_format.resetToSimpleValue(OffsetType::kSignedOffset, dataSize);
+    re->_sourceSectionId = _section->id();
+    re->_sourceOffset = offset();
+    re->_payload = (uint64_t)(uintptr_t)exp;
+
+    writer.emitZeros(dataSize);
+  }
+
+  writer.done(this);
+  return kErrorOk;
+}
+
+// BaseAssembler - Comment
+// =======================
+
+Error BaseAssembler::comment(const char* data, size_t size) {
+  if (!hasEmitterFlag(EmitterFlags::kLogComments)) {
+    if (!hasEmitterFlag(EmitterFlags::kAttached))
+      return reportError(DebugUtils::errored(kErrorNotInitialized));
+    return kErrorOk;
+  }
+
+#ifndef ASMJIT_NO_LOGGING
+  // Logger cannot be NULL if `EmitterFlags::kLogComments` is set.
+  ASMJIT_ASSERT(_logger != nullptr);
+
+  _logger->log(data, size);
+  _logger->log("\n", 1);
+  return kErrorOk;
+#else
+  DebugUtils::unused(data, size);
+  return kErrorOk;
+#endif
+}
+
+// BaseAssembler - Events
+// ======================
+
+Error BaseAssembler::onAttach(CodeHolder* code) noexcept {
+  ASMJIT_PROPAGATE(Base::onAttach(code));
+
+  // Attach to the end of the .text section.
+  BaseAssembler_initSection(this, code->_sections[0]);
+
+  return kErrorOk;
+}
+
+Error BaseAssembler::onDetach(CodeHolder* code) noexcept {
+  _section    = nullptr;
+  _bufferData = nullptr;
+  _bufferEnd  = nullptr;
+  _bufferPtr  = nullptr;
+  return Base::onDetach(code);
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/assembler.h b/lib/lepton/asmjit/core/assembler.h
new file mode 100644
index 0000000000..7ea2505f04
--- /dev/null
+++ b/lib/lepton/asmjit/core/assembler.h
@@ -0,0 +1,129 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ASSEMBLER_H_INCLUDED
+#define ASMJIT_CORE_ASSEMBLER_H_INCLUDED
+
+#include "../core/codeholder.h"
+#include "../core/emitter.h"
+#include "../core/operand.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_assembler
+//! \{
+
+//! Base assembler.
+//!
+//! This is a base class that provides interface used by architecture specific
+//! assembler implementations. Assembler doesn't hold any data, instead it's
+//! attached to \ref CodeHolder, which provides all the data that Assembler
+//! needs and which can be altered by it.
+//!
+//! Check out architecture specific assemblers for more details and examples:
+//!
+//!   - \ref x86::Assembler - X86/X64 assembler implementation.
+class ASMJIT_VIRTAPI BaseAssembler : public BaseEmitter {
+public:
+  ASMJIT_NONCOPYABLE(BaseAssembler)
+  typedef BaseEmitter Base;
+
+  //! Current section where the assembling happens.
+  Section* _section = nullptr;
+  //! Start of the CodeBuffer of the current section.
+  uint8_t* _bufferData = nullptr;
+  //! End (first invalid byte) of the current section.
+  uint8_t* _bufferEnd = nullptr;
+  //! Pointer in the CodeBuffer of the current section.
+  uint8_t* _bufferPtr = nullptr;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `BaseAssembler` instance.
+  ASMJIT_API BaseAssembler() noexcept;
+  //! Destroys the `BaseAssembler` instance.
+  ASMJIT_API virtual ~BaseAssembler() noexcept;
+
+  //! \}
+
+  //! \name Code-Buffer Management
+  //! \{
+
+  //! Returns the capacity of the current CodeBuffer.
+  inline size_t bufferCapacity() const noexcept { return (size_t)(_bufferEnd - _bufferData); }
+  //! Returns the number of remaining bytes in the current CodeBuffer.
+  inline size_t remainingSpace() const noexcept { return (size_t)(_bufferEnd - _bufferPtr); }
+
+  //! Returns the current position in the CodeBuffer.
+  inline size_t offset() const noexcept { return (size_t)(_bufferPtr - _bufferData); }
+
+  //! Sets the current position in the CodeBuffer to `offset`.
+  //!
+  //! \note The `offset` cannot be greater than buffer size even if it's
+  //! within the buffer's capacity.
+  ASMJIT_API Error setOffset(size_t offset);
+
+  //! Returns the start of the CodeBuffer in the current section.
+  inline uint8_t* bufferData() const noexcept { return _bufferData; }
+  //! Returns the end (first invalid byte) in the current section.
+  inline uint8_t* bufferEnd() const noexcept { return _bufferEnd; }
+  //! Returns the current pointer in the CodeBuffer in the current section.
+  inline uint8_t* bufferPtr() const noexcept { return _bufferPtr; }
+
+  //! \}
+
+  //! \name Section Management
+  //! \{
+
+  //! Returns the current section.
+  inline Section* currentSection() const noexcept { return _section; }
+
+  ASMJIT_API Error section(Section* section) override;
+
+  //! \}
+
+  //! \name Label Management
+  //! \{
+
+  ASMJIT_API Label newLabel() override;
+  ASMJIT_API Label newNamedLabel(const char* name, size_t nameSize = SIZE_MAX, LabelType type = LabelType::kGlobal, uint32_t parentId = Globals::kInvalidId) override;
+  ASMJIT_API Error bind(const Label& label) override;
+
+  //! \}
+
+  //! \name Embed
+  //! \{
+
+  ASMJIT_API Error embed(const void* data, size_t dataSize) override;
+  ASMJIT_API Error embedDataArray(TypeId typeId, const void* data, size_t itemCount, size_t repeatCount = 1) override;
+  ASMJIT_API Error embedConstPool(const Label& label, const ConstPool& pool) override;
+
+  ASMJIT_API Error embedLabel(const Label& label, size_t dataSize = 0) override;
+  ASMJIT_API Error embedLabelDelta(const Label& label, const Label& base, size_t dataSize = 0) override;
+
+  //! \}
+
+  //! \name Comment
+  //! \{
+
+  ASMJIT_API Error comment(const char* data, size_t size = SIZE_MAX) override;
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ASSEMBLER_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/builder.cpp b/lib/lepton/asmjit/core/builder.cpp
new file mode 100644
index 0000000000..5df243e7b8
--- /dev/null
+++ b/lib/lepton/asmjit/core/builder.cpp
@@ -0,0 +1,889 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_BUILDER
+
+#include "../core/builder.h"
+#include "../core/emitterutils_p.h"
+#include "../core/errorhandler.h"
+#include "../core/formatter.h"
+#include "../core/logger.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// PostponedErrorHandler (Internal)
+// ================================
+
+//! Postponed error handler that never throws. Used as a temporal error handler
+//! to run passes. If error occurs, the caller is notified and will call the
+//! real error handler, that can throw.
+class PostponedErrorHandler : public ErrorHandler {
+public:
+  void handleError(Error err, const char* message, BaseEmitter* origin) override {
+    DebugUtils::unused(err, origin);
+    _message.assign(message);
+  }
+
+  StringTmp<128> _message;
+};
+
+// BaseBuilder - Utilities
+// =======================
+
+static void BaseBuilder_deletePasses(BaseBuilder* self) noexcept {
+  for (Pass* pass : self->_passes)
+    pass->~Pass();
+  self->_passes.reset();
+}
+
+// BaseBuilder - Construction & Destruction
+// ========================================
+
+BaseBuilder::BaseBuilder() noexcept
+  : BaseEmitter(EmitterType::kBuilder),
+    _codeZone(32768 - Zone::kBlockOverhead),
+    _dataZone(16384 - Zone::kBlockOverhead),
+    _passZone(65536 - Zone::kBlockOverhead),
+    _allocator(&_codeZone) {}
+
+BaseBuilder::~BaseBuilder() noexcept {
+  BaseBuilder_deletePasses(this);
+}
+
+// BaseBuilder - Node Management
+// =============================
+
+Error BaseBuilder::newInstNode(InstNode** out, InstId instId, InstOptions instOptions, uint32_t opCount) {
+  uint32_t opCapacity = InstNode::capacityOfOpCount(opCount);
+  ASMJIT_ASSERT(opCapacity >= InstNode::kBaseOpCapacity);
+
+  InstNode* node = _allocator.allocT<InstNode>(InstNode::nodeSizeOfOpCapacity(opCapacity));
+  if (ASMJIT_UNLIKELY(!node))
+    return reportError(DebugUtils::errored(kErrorOutOfMemory));
+
+  *out = new(node) InstNode(this, instId, instOptions, opCount, opCapacity);
+  return kErrorOk;
+}
+
+
+Error BaseBuilder::newLabelNode(LabelNode** out) {
+  *out = nullptr;
+
+  ASMJIT_PROPAGATE(_newNodeT<LabelNode>(out));
+  return registerLabelNode(*out);
+}
+
+Error BaseBuilder::newAlignNode(AlignNode** out, AlignMode alignMode, uint32_t alignment) {
+  *out = nullptr;
+  return _newNodeT<AlignNode>(out, alignMode, alignment);
+}
+
+Error BaseBuilder::newEmbedDataNode(EmbedDataNode** out, TypeId typeId, const void* data, size_t itemCount, size_t repeatCount) {
+  *out = nullptr;
+
+  uint32_t deabstractDelta = TypeUtils::deabstractDeltaOfSize(registerSize());
+  TypeId finalTypeId = TypeUtils::deabstract(typeId, deabstractDelta);
+
+  if (ASMJIT_UNLIKELY(!TypeUtils::isValid(finalTypeId)))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  uint32_t typeSize = TypeUtils::sizeOf(finalTypeId);
+  Support::FastUInt8 of = 0;
+
+  size_t dataSize = Support::mulOverflow(itemCount, size_t(typeSize), &of);
+  if (ASMJIT_UNLIKELY(of))
+    return reportError(DebugUtils::errored(kErrorOutOfMemory));
+
+  EmbedDataNode* node;
+  ASMJIT_PROPAGATE(_newNodeT<EmbedDataNode>(&node));
+
+  node->_embed._typeId = typeId;
+  node->_embed._typeSize = uint8_t(typeSize);
+  node->_itemCount = itemCount;
+  node->_repeatCount = repeatCount;
+
+  uint8_t* dstData = node->_inlineData;
+  if (dataSize > EmbedDataNode::kInlineBufferSize) {
+    dstData = static_cast<uint8_t*>(_dataZone.alloc(dataSize, 8));
+    if (ASMJIT_UNLIKELY(!dstData))
+      return reportError(DebugUtils::errored(kErrorOutOfMemory));
+    node->_externalData = dstData;
+  }
+
+  if (data)
+    memcpy(dstData, data, dataSize);
+
+  *out = node;
+  return kErrorOk;
+}
+
+Error BaseBuilder::newConstPoolNode(ConstPoolNode** out) {
+  *out = nullptr;
+
+  ASMJIT_PROPAGATE(_newNodeT<ConstPoolNode>(out));
+  return registerLabelNode(*out);
+}
+
+Error BaseBuilder::newCommentNode(CommentNode** out, const char* data, size_t size) {
+  *out = nullptr;
+
+  if (data) {
+    if (size == SIZE_MAX)
+      size = strlen(data);
+
+    if (size > 0) {
+      data = static_cast<char*>(_dataZone.dup(data, size, true));
+      if (ASMJIT_UNLIKELY(!data))
+        return reportError(DebugUtils::errored(kErrorOutOfMemory));
+    }
+  }
+
+  return _newNodeT<CommentNode>(out, data);
+}
+
+BaseNode* BaseBuilder::addNode(BaseNode* node) noexcept {
+  ASMJIT_ASSERT(!node->_prev);
+  ASMJIT_ASSERT(!node->_next);
+  ASMJIT_ASSERT(!node->isActive());
+
+  if (!_cursor) {
+    if (!_firstNode) {
+      _firstNode = node;
+      _lastNode = node;
+    }
+    else {
+      node->_next = _firstNode;
+      _firstNode->_prev = node;
+      _firstNode = node;
+    }
+  }
+  else {
+    BaseNode* prev = _cursor;
+    BaseNode* next = _cursor->next();
+
+    node->_prev = prev;
+    node->_next = next;
+
+    prev->_next = node;
+    if (next)
+      next->_prev = node;
+    else
+      _lastNode = node;
+  }
+
+  node->addFlags(NodeFlags::kIsActive);
+  if (node->isSection())
+    _dirtySectionLinks = true;
+
+  _cursor = node;
+  return node;
+}
+
+BaseNode* BaseBuilder::addAfter(BaseNode* node, BaseNode* ref) noexcept {
+  ASMJIT_ASSERT(!node->_prev);
+  ASMJIT_ASSERT(!node->_next);
+
+  BaseNode* prev = ref;
+  BaseNode* next = ref->next();
+
+  node->_prev = prev;
+  node->_next = next;
+
+  node->addFlags(NodeFlags::kIsActive);
+  if (node->isSection())
+    _dirtySectionLinks = true;
+
+  prev->_next = node;
+  if (next)
+    next->_prev = node;
+  else
+    _lastNode = node;
+
+  return node;
+}
+
+BaseNode* BaseBuilder::addBefore(BaseNode* node, BaseNode* ref) noexcept {
+  ASMJIT_ASSERT(!node->_prev);
+  ASMJIT_ASSERT(!node->_next);
+  ASMJIT_ASSERT(!node->isActive());
+  ASMJIT_ASSERT(ref->isActive());
+
+  BaseNode* prev = ref->prev();
+  BaseNode* next = ref;
+
+  node->_prev = prev;
+  node->_next = next;
+
+  node->addFlags(NodeFlags::kIsActive);
+  if (node->isSection())
+    _dirtySectionLinks = true;
+
+  next->_prev = node;
+  if (prev)
+    prev->_next = node;
+  else
+    _firstNode = node;
+
+  return node;
+}
+
+BaseNode* BaseBuilder::removeNode(BaseNode* node) noexcept {
+  if (!node->isActive())
+    return node;
+
+  BaseNode* prev = node->prev();
+  BaseNode* next = node->next();
+
+  if (_firstNode == node)
+    _firstNode = next;
+  else
+    prev->_next = next;
+
+  if (_lastNode == node)
+    _lastNode  = prev;
+  else
+    next->_prev = prev;
+
+  node->_prev = nullptr;
+  node->_next = nullptr;
+  node->clearFlags(NodeFlags::kIsActive);
+  if (node->isSection())
+    _dirtySectionLinks = true;
+
+  if (_cursor == node)
+    _cursor = prev;
+
+  return node;
+}
+
+void BaseBuilder::removeNodes(BaseNode* first, BaseNode* last) noexcept {
+  if (first == last) {
+    removeNode(first);
+    return;
+  }
+
+  if (!first->isActive())
+    return;
+
+  BaseNode* prev = first->prev();
+  BaseNode* next = last->next();
+
+  if (_firstNode == first)
+    _firstNode = next;
+  else
+    prev->_next = next;
+
+  if (_lastNode == last)
+    _lastNode  = prev;
+  else
+    next->_prev = prev;
+
+  BaseNode* node = first;
+  uint32_t didRemoveSection = false;
+
+  for (;;) {
+    next = node->next();
+    ASMJIT_ASSERT(next != nullptr);
+
+    node->_prev = nullptr;
+    node->_next = nullptr;
+    node->clearFlags(NodeFlags::kIsActive);
+    didRemoveSection |= uint32_t(node->isSection());
+
+    if (_cursor == node)
+      _cursor = prev;
+
+    if (node == last)
+      break;
+    node = next;
+  }
+
+  if (didRemoveSection)
+    _dirtySectionLinks = true;
+}
+
+BaseNode* BaseBuilder::setCursor(BaseNode* node) noexcept {
+  BaseNode* old = _cursor;
+  _cursor = node;
+  return old;
+}
+
+// BaseBuilder - Sections
+// ======================
+
+Error BaseBuilder::sectionNodeOf(SectionNode** out, uint32_t sectionId) {
+  *out = nullptr;
+
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (ASMJIT_UNLIKELY(!_code->isSectionValid(sectionId)))
+    return reportError(DebugUtils::errored(kErrorInvalidSection));
+
+  if (sectionId >= _sectionNodes.size()) {
+    Error err = _sectionNodes.reserve(&_allocator, sectionId + 1);
+    if (ASMJIT_UNLIKELY(err != kErrorOk))
+      return reportError(err);
+  }
+
+  SectionNode* node = nullptr;
+  if (sectionId < _sectionNodes.size())
+    node = _sectionNodes[sectionId];
+
+  if (!node) {
+    ASMJIT_PROPAGATE(_newNodeT<SectionNode>(&node, sectionId));
+
+    // We have already reserved enough space, this cannot fail now.
+    if (sectionId >= _sectionNodes.size())
+      _sectionNodes.resize(&_allocator, sectionId + 1);
+
+    _sectionNodes[sectionId] = node;
+  }
+
+  *out = node;
+  return kErrorOk;
+}
+
+Error BaseBuilder::section(Section* section) {
+  SectionNode* node;
+  ASMJIT_PROPAGATE(sectionNodeOf(&node, section->id()));
+  ASMJIT_ASSUME(node != nullptr);
+
+  if (!node->isActive()) {
+    // Insert the section at the end if it was not part of the code.
+    addAfter(node, lastNode());
+    _cursor = node;
+  }
+  else {
+    // This is a bit tricky. We cache section links to make sure that
+    // switching sections doesn't involve traversal in linked-list unless
+    // the position of the section has changed.
+    if (hasDirtySectionLinks())
+      updateSectionLinks();
+
+    if (node->_nextSection)
+      _cursor = node->_nextSection->_prev;
+    else
+      _cursor = _lastNode;
+  }
+
+  return kErrorOk;
+}
+
+void BaseBuilder::updateSectionLinks() noexcept {
+  if (!_dirtySectionLinks)
+    return;
+
+  BaseNode* node_ = _firstNode;
+  SectionNode* currentSection = nullptr;
+
+  while (node_) {
+    if (node_->isSection()) {
+      if (currentSection)
+        currentSection->_nextSection = node_->as<SectionNode>();
+      currentSection = node_->as<SectionNode>();
+    }
+    node_ = node_->next();
+  }
+
+  if (currentSection)
+    currentSection->_nextSection = nullptr;
+
+  _dirtySectionLinks = false;
+}
+
+// BaseBuilder - Labels
+// ====================
+
+Error BaseBuilder::labelNodeOf(LabelNode** out, uint32_t labelId) {
+  *out = nullptr;
+
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  uint32_t index = labelId;
+  if (ASMJIT_UNLIKELY(index >= _code->labelCount()))
+    return DebugUtils::errored(kErrorInvalidLabel);
+
+  if (index >= _labelNodes.size())
+    ASMJIT_PROPAGATE(_labelNodes.resize(&_allocator, index + 1));
+
+  LabelNode* node = _labelNodes[index];
+  if (!node) {
+    ASMJIT_PROPAGATE(_newNodeT<LabelNode>(&node, labelId));
+    _labelNodes[index] = node;
+  }
+
+  *out = node;
+  return kErrorOk;
+}
+
+Error BaseBuilder::registerLabelNode(LabelNode* node) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  LabelEntry* le;
+  ASMJIT_PROPAGATE(_code->newLabelEntry(&le));
+  uint32_t labelId = le->id();
+
+  // We just added one label so it must be true.
+  ASMJIT_ASSERT(_labelNodes.size() < labelId + 1);
+  ASMJIT_PROPAGATE(_labelNodes.resize(&_allocator, labelId + 1));
+
+  _labelNodes[labelId] = node;
+  node->_labelId = labelId;
+
+  return kErrorOk;
+}
+
+static Error BaseBuilder_newLabelInternal(BaseBuilder* self, uint32_t labelId) {
+  ASMJIT_ASSERT(self->_labelNodes.size() < labelId + 1);
+
+  uint32_t growBy = labelId - self->_labelNodes.size();
+  Error err = self->_labelNodes.willGrow(&self->_allocator, growBy);
+
+  if (ASMJIT_UNLIKELY(err))
+    return self->reportError(err);
+
+  LabelNode* node;
+  ASMJIT_PROPAGATE(self->_newNodeT<LabelNode>(&node, labelId));
+
+  self->_labelNodes.resize(&self->_allocator, labelId + 1);
+  self->_labelNodes[labelId] = node;
+  node->_labelId = labelId;
+  return kErrorOk;
+}
+
+Label BaseBuilder::newLabel() {
+  uint32_t labelId = Globals::kInvalidId;
+  LabelEntry* le;
+
+  if (_code &&
+      _code->newLabelEntry(&le) == kErrorOk &&
+      BaseBuilder_newLabelInternal(this, le->id()) == kErrorOk) {
+    labelId = le->id();
+  }
+
+  return Label(labelId);
+}
+
+Label BaseBuilder::newNamedLabel(const char* name, size_t nameSize, LabelType type, uint32_t parentId) {
+  uint32_t labelId = Globals::kInvalidId;
+  LabelEntry* le;
+
+  if (_code &&
+      _code->newNamedLabelEntry(&le, name, nameSize, type, parentId) == kErrorOk &&
+      BaseBuilder_newLabelInternal(this, le->id()) == kErrorOk) {
+    labelId = le->id();
+  }
+
+  return Label(labelId);
+}
+
+Error BaseBuilder::bind(const Label& label) {
+  LabelNode* node;
+  ASMJIT_PROPAGATE(labelNodeOf(&node, label));
+
+  addNode(node);
+  return kErrorOk;
+}
+
+// BaseBuilder - Passes
+// ====================
+
+ASMJIT_FAVOR_SIZE Pass* BaseBuilder::passByName(const char* name) const noexcept {
+  for (Pass* pass : _passes)
+    if (strcmp(pass->name(), name) == 0)
+      return pass;
+  return nullptr;
+}
+
+ASMJIT_FAVOR_SIZE Error BaseBuilder::addPass(Pass* pass) noexcept {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (ASMJIT_UNLIKELY(pass == nullptr)) {
+    // Since this is directly called by `addPassT()` we treat `null` argument
+    // as out-of-memory condition. Otherwise it would be API misuse.
+    return DebugUtils::errored(kErrorOutOfMemory);
+  }
+  else if (ASMJIT_UNLIKELY(pass->_cb)) {
+    // Kinda weird, but okay...
+    if (pass->_cb == this)
+      return kErrorOk;
+    return DebugUtils::errored(kErrorInvalidState);
+  }
+
+  ASMJIT_PROPAGATE(_passes.append(&_allocator, pass));
+  pass->_cb = this;
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error BaseBuilder::deletePass(Pass* pass) noexcept {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (ASMJIT_UNLIKELY(pass == nullptr))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  if (pass->_cb != nullptr) {
+    if (pass->_cb != this)
+      return DebugUtils::errored(kErrorInvalidState);
+
+    uint32_t index = _passes.indexOf(pass);
+    ASMJIT_ASSERT(index != Globals::kNotFound);
+
+    pass->_cb = nullptr;
+    _passes.removeAt(index);
+  }
+
+  pass->~Pass();
+  return kErrorOk;
+}
+
+Error BaseBuilder::runPasses() {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (_passes.empty())
+    return kErrorOk;
+
+  ErrorHandler* prev = errorHandler();
+  PostponedErrorHandler postponed;
+
+  Error err = kErrorOk;
+  setErrorHandler(&postponed);
+
+  for (Pass* pass : _passes) {
+    _passZone.reset();
+    err = pass->run(&_passZone, _logger);
+    if (err)
+      break;
+  }
+  _passZone.reset();
+  setErrorHandler(prev);
+
+  if (ASMJIT_UNLIKELY(err))
+    return reportError(err, !postponed._message.empty() ? postponed._message.data() : nullptr);
+
+  return kErrorOk;
+}
+
+// BaseBuilder - Emit
+// ==================
+
+Error BaseBuilder::_emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) {
+  uint32_t opCount = EmitterUtils::opCountFromEmitArgs(o0, o1, o2, opExt);
+  InstOptions options = instOptions() | forcedInstOptions();
+
+  if (Support::test(options, InstOptions::kReserved)) {
+    if (ASMJIT_UNLIKELY(!_code))
+      return DebugUtils::errored(kErrorNotInitialized);
+
+#ifndef ASMJIT_NO_VALIDATION
+    // Strict validation.
+    if (hasDiagnosticOption(DiagnosticOptions::kValidateIntermediate)) {
+      Operand_ opArray[Globals::kMaxOpCount];
+      EmitterUtils::opArrayFromEmitArgs(opArray, o0, o1, o2, opExt);
+
+      ValidationFlags validationFlags = isCompiler() ? ValidationFlags::kEnableVirtRegs : ValidationFlags::kNone;
+      Error err = _funcs.validate(arch(), BaseInst(instId, options, _extraReg), opArray, opCount, validationFlags);
+
+      if (ASMJIT_UNLIKELY(err)) {
+        resetInstOptions();
+        resetExtraReg();
+        resetInlineComment();
+        return reportError(err);
+      }
+    }
+#endif
+
+    // Clear instruction options that should never be part of a regular instruction.
+    options &= ~InstOptions::kReserved;
+  }
+
+  uint32_t opCapacity = InstNode::capacityOfOpCount(opCount);
+  ASMJIT_ASSERT(opCapacity >= InstNode::kBaseOpCapacity);
+
+  InstNode* node = _allocator.allocT<InstNode>(InstNode::nodeSizeOfOpCapacity(opCapacity));
+  const char* comment = inlineComment();
+
+  resetInstOptions();
+  resetInlineComment();
+
+  if (ASMJIT_UNLIKELY(!node)) {
+    resetExtraReg();
+    return reportError(DebugUtils::errored(kErrorOutOfMemory));
+  }
+
+  node = new(node) InstNode(this, instId, options, opCount, opCapacity);
+  node->setExtraReg(extraReg());
+  node->setOp(0, o0);
+  node->setOp(1, o1);
+  node->setOp(2, o2);
+  for (uint32_t i = 3; i < opCount; i++)
+    node->setOp(i, opExt[i - 3]);
+  node->resetOpRange(opCount, opCapacity);
+
+  if (comment)
+    node->setInlineComment(static_cast<char*>(_dataZone.dup(comment, strlen(comment), true)));
+
+  addNode(node);
+  resetExtraReg();
+  return kErrorOk;
+}
+
+// BaseBuilder - Align
+// ===================
+
+Error BaseBuilder::align(AlignMode alignMode, uint32_t alignment) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  AlignNode* node;
+  ASMJIT_PROPAGATE(newAlignNode(&node, alignMode, alignment));
+  ASMJIT_ASSUME(node != nullptr);
+
+  addNode(node);
+  return kErrorOk;
+}
+
+// BaseBuilder - Embed
+// ===================
+
+Error BaseBuilder::embed(const void* data, size_t dataSize) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  EmbedDataNode* node;
+  ASMJIT_PROPAGATE(newEmbedDataNode(&node, TypeId::kUInt8, data, dataSize));
+  ASMJIT_ASSUME(node != nullptr);
+
+  addNode(node);
+  return kErrorOk;
+}
+
+Error BaseBuilder::embedDataArray(TypeId typeId, const void* data, size_t itemCount, size_t itemRepeat) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  EmbedDataNode* node;
+  ASMJIT_PROPAGATE(newEmbedDataNode(&node, typeId, data, itemCount, itemRepeat));
+  ASMJIT_ASSUME(node != nullptr);
+
+  addNode(node);
+  return kErrorOk;
+}
+
+Error BaseBuilder::embedConstPool(const Label& label, const ConstPool& pool) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (!isLabelValid(label))
+    return reportError(DebugUtils::errored(kErrorInvalidLabel));
+
+  ASMJIT_PROPAGATE(align(AlignMode::kData, uint32_t(pool.alignment())));
+  ASMJIT_PROPAGATE(bind(label));
+
+  EmbedDataNode* node;
+  ASMJIT_PROPAGATE(newEmbedDataNode(&node, TypeId::kUInt8, nullptr, pool.size()));
+  ASMJIT_ASSUME(node != nullptr);
+
+  pool.fill(node->data());
+  addNode(node);
+  return kErrorOk;
+}
+
+// BaseBuilder - EmbedLabel & EmbedLabelDelta
+// ==========================================
+//
+// If dataSize is zero it means that the size is the same as target register width, however,
+// if it's provided we really want to validate whether it's within the possible range.
+
+static inline bool BaseBuilder_checkDataSize(size_t dataSize) noexcept {
+  return !dataSize || (Support::isPowerOf2(dataSize) && dataSize <= 8);
+}
+
+Error BaseBuilder::embedLabel(const Label& label, size_t dataSize) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (!BaseBuilder_checkDataSize(dataSize))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  EmbedLabelNode* node;
+  ASMJIT_PROPAGATE(_newNodeT<EmbedLabelNode>(&node, label.id(), uint32_t(dataSize)));
+
+  addNode(node);
+  return kErrorOk;
+}
+
+Error BaseBuilder::embedLabelDelta(const Label& label, const Label& base, size_t dataSize) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (!BaseBuilder_checkDataSize(dataSize))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  EmbedLabelDeltaNode* node;
+  ASMJIT_PROPAGATE(_newNodeT<EmbedLabelDeltaNode>(&node, label.id(), base.id(), uint32_t(dataSize)));
+
+  addNode(node);
+  return kErrorOk;
+}
+
+// BaseBuilder - Comment
+// =====================
+
+Error BaseBuilder::comment(const char* data, size_t size) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  CommentNode* node;
+  ASMJIT_PROPAGATE(newCommentNode(&node, data, size));
+  ASMJIT_ASSUME(node != nullptr);
+
+  addNode(node);
+  return kErrorOk;
+}
+
+// BaseBuilder - SerializeTo
+// =========================
+
+Error BaseBuilder::serializeTo(BaseEmitter* dst) {
+  Error err = kErrorOk;
+  BaseNode* node_ = _firstNode;
+
+  Operand_ opArray[Globals::kMaxOpCount];
+
+  do {
+    dst->setInlineComment(node_->inlineComment());
+
+    if (node_->isInst()) {
+      InstNode* node = node_->as<InstNode>();
+
+      // NOTE: Inlined to remove one additional call per instruction.
+      dst->setInstOptions(node->options());
+      dst->setExtraReg(node->extraReg());
+
+      const Operand_* op = node->operands();
+      const Operand_* opExt = EmitterUtils::noExt;
+
+      uint32_t opCount = node->opCount();
+      if (opCount > 3) {
+        uint32_t i = 4;
+        opArray[3] = op[3];
+
+        while (i < opCount) {
+          opArray[i].copyFrom(op[i]);
+          i++;
+        }
+        while (i < Globals::kMaxOpCount) {
+          opArray[i].reset();
+          i++;
+        }
+        opExt = opArray + 3;
+      }
+
+      err = dst->_emit(node->id(), op[0], op[1], op[2], opExt);
+    }
+    else if (node_->isLabel()) {
+      if (node_->isConstPool()) {
+        ConstPoolNode* node = node_->as<ConstPoolNode>();
+        err = dst->embedConstPool(node->label(), node->constPool());
+      }
+      else {
+        LabelNode* node = node_->as<LabelNode>();
+        err = dst->bind(node->label());
+      }
+    }
+    else if (node_->isAlign()) {
+      AlignNode* node = node_->as<AlignNode>();
+      err = dst->align(node->alignMode(), node->alignment());
+    }
+    else if (node_->isEmbedData()) {
+      EmbedDataNode* node = node_->as<EmbedDataNode>();
+      err = dst->embedDataArray(node->typeId(), node->data(), node->itemCount(), node->repeatCount());
+    }
+    else if (node_->isEmbedLabel()) {
+      EmbedLabelNode* node = node_->as<EmbedLabelNode>();
+      err = dst->embedLabel(node->label(), node->dataSize());
+    }
+    else if (node_->isEmbedLabelDelta()) {
+      EmbedLabelDeltaNode* node = node_->as<EmbedLabelDeltaNode>();
+      err = dst->embedLabelDelta(node->label(), node->baseLabel(), node->dataSize());
+    }
+    else if (node_->isSection()) {
+      SectionNode* node = node_->as<SectionNode>();
+      err = dst->section(_code->sectionById(node->id()));
+    }
+    else if (node_->isComment()) {
+      CommentNode* node = node_->as<CommentNode>();
+      err = dst->comment(node->inlineComment());
+    }
+
+    if (err) break;
+    node_ = node_->next();
+  } while (node_);
+
+  return err;
+}
+
+// BaseBuilder - Events
+// ====================
+
+Error BaseBuilder::onAttach(CodeHolder* code) noexcept {
+  ASMJIT_PROPAGATE(Base::onAttach(code));
+
+  SectionNode* initialSection;
+  Error err = sectionNodeOf(&initialSection, 0);
+
+  if (!err)
+    err = _passes.willGrow(&_allocator, 8);
+
+  if (ASMJIT_UNLIKELY(err)) {
+    onDetach(code);
+    return err;
+  }
+
+  ASMJIT_ASSUME(initialSection != nullptr);
+  _cursor = initialSection;
+  _firstNode = initialSection;
+  _lastNode = initialSection;
+  initialSection->setFlags(NodeFlags::kIsActive);
+
+  return kErrorOk;
+}
+
+Error BaseBuilder::onDetach(CodeHolder* code) noexcept {
+  BaseBuilder_deletePasses(this);
+  _sectionNodes.reset();
+  _labelNodes.reset();
+
+  _allocator.reset(&_codeZone);
+  _codeZone.reset();
+  _dataZone.reset();
+  _passZone.reset();
+
+  _nodeFlags = NodeFlags::kNone;
+  _cursor = nullptr;
+  _firstNode = nullptr;
+  _lastNode = nullptr;
+
+  return Base::onDetach(code);
+}
+
+// Pass - Construction & Destruction
+// =================================
+
+Pass::Pass(const char* name) noexcept
+  : _name(name) {}
+Pass::~Pass() noexcept {}
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_BUILDER
diff --git a/lib/lepton/asmjit/core/builder.h b/lib/lepton/asmjit/core/builder.h
new file mode 100644
index 0000000000..3575de2fbb
--- /dev/null
+++ b/lib/lepton/asmjit/core/builder.h
@@ -0,0 +1,1391 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_BUILDER_H_INCLUDED
+#define ASMJIT_CORE_BUILDER_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_BUILDER
+
+#include "../core/assembler.h"
+#include "../core/codeholder.h"
+#include "../core/constpool.h"
+#include "../core/formatter.h"
+#include "../core/inst.h"
+#include "../core/operand.h"
+#include "../core/string.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../core/zone.h"
+#include "../core/zonevector.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_builder
+//! \{
+
+class BaseBuilder;
+class Pass;
+
+class BaseNode;
+class InstNode;
+class SectionNode;
+class LabelNode;
+class AlignNode;
+class EmbedDataNode;
+class EmbedLabelNode;
+class ConstPoolNode;
+class CommentNode;
+class SentinelNode;
+class LabelDeltaNode;
+
+//! Type of node used by \ref BaseBuilder and \ref BaseCompiler.
+enum class NodeType : uint8_t {
+  //! Invalid node (internal, don't use).
+  kNone = 0,
+
+  // [BaseBuilder]
+
+  //! Node is \ref InstNode or \ref InstExNode.
+  kInst = 1,
+  //! Node is \ref SectionNode.
+  kSection = 2,
+  //! Node is \ref LabelNode.
+  kLabel = 3,
+  //! Node is \ref AlignNode.
+  kAlign = 4,
+  //! Node is \ref EmbedDataNode.
+  kEmbedData = 5,
+  //! Node is \ref EmbedLabelNode.
+  kEmbedLabel = 6,
+  //! Node is \ref EmbedLabelDeltaNode.
+  kEmbedLabelDelta = 7,
+  //! Node is \ref ConstPoolNode.
+  kConstPool = 8,
+  //! Node is \ref CommentNode.
+  kComment = 9,
+  //! Node is \ref SentinelNode.
+  kSentinel = 10,
+
+  // [BaseCompiler]
+
+  //! Node is \ref JumpNode (acts as InstNode).
+  kJump = 15,
+  //! Node is \ref FuncNode (acts as LabelNode).
+  kFunc = 16,
+  //! Node is \ref FuncRetNode (acts as InstNode).
+  kFuncRet = 17,
+  //! Node is \ref InvokeNode (acts as InstNode).
+  kInvoke = 18,
+
+  // [UserDefined]
+
+  //! First id of a user-defined node.
+  kUser = 32
+};
+
+//! Node flags, specify what the node is and/or does.
+enum class NodeFlags : uint8_t {
+  //! No flags.
+  kNone = 0,
+  //! Node is code that can be executed (instruction, label, align, etc...).
+  kIsCode = 0x01u,
+  //! Node is data that cannot be executed (data, const-pool, etc...).
+  kIsData = 0x02u,
+  //! Node is informative, can be removed and ignored.
+  kIsInformative = 0x04u,
+  //! Node can be safely removed if unreachable.
+  kIsRemovable = 0x08u,
+  //! Node does nothing when executed (label, align, explicit nop).
+  kHasNoEffect = 0x10u,
+  //! Node is an instruction or acts as it.
+  kActsAsInst = 0x20u,
+  //! Node is a label or acts as it.
+  kActsAsLabel = 0x40u,
+  //! Node is active (part of the code).
+  kIsActive = 0x80u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(NodeFlags)
+
+//! Type of the sentinel (purery informative purpose).
+enum class SentinelType : uint8_t {
+  //! Type of the sentinel is not known.
+  kUnknown = 0u,
+  //! This is a sentinel used at the end of \ref FuncNode.
+  kFuncEnd = 1u
+};
+
+//! Builder interface.
+//!
+//! `BaseBuilder` interface was designed to be used as a \ref BaseAssembler replacement in case pre-processing or
+//! post-processing of the generated code is required. The code can be modified during or after code generation.
+//! Pre processing or post processing can be done manually or through a \ref Pass object. \ref BaseBuilder stores
+//! the emitted code as a double-linked list of nodes, which allows O(1) insertion and removal during processing.
+//!
+//! Check out architecture specific builders for more details and examples:
+//!
+//!   - \ref x86::Builder - X86/X64 builder implementation.
+class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter {
+public:
+  ASMJIT_NONCOPYABLE(BaseBuilder)
+  typedef BaseEmitter Base;
+
+  //! \name Members
+  //! \{
+
+  //! Base zone used to allocate nodes and passes.
+  Zone _codeZone;
+  //! Data zone used to allocate data and names.
+  Zone _dataZone;
+  //! Pass zone, passed to `Pass::run()`.
+  Zone _passZone;
+  //! Allocator that uses `_codeZone`.
+  ZoneAllocator _allocator;
+
+  //! Array of `Pass` objects.
+  ZoneVector<Pass*> _passes {};
+  //! Maps section indexes to `LabelNode` nodes.
+  ZoneVector<SectionNode*> _sectionNodes {};
+  //! Maps label indexes to `LabelNode` nodes.
+  ZoneVector<LabelNode*> _labelNodes {};
+
+  //! Current node (cursor).
+  BaseNode* _cursor = nullptr;
+  //! First node of the current section.
+  BaseNode* _firstNode = nullptr;
+  //! Last node of the current section.
+  BaseNode* _lastNode = nullptr;
+
+  //! Flags assigned to each new node.
+  NodeFlags _nodeFlags = NodeFlags::kNone;
+  //! The sections links are dirty (used internally).
+  bool _dirtySectionLinks = false;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `BaseBuilder` instance.
+  ASMJIT_API BaseBuilder() noexcept;
+  //! Destroys the `BaseBuilder` instance.
+  ASMJIT_API virtual ~BaseBuilder() noexcept;
+
+  //! \}
+
+  //! \name Node Management
+  //! \{
+
+  //! Returns the first node.
+  inline BaseNode* firstNode() const noexcept { return _firstNode; }
+  //! Returns the last node.
+  inline BaseNode* lastNode() const noexcept { return _lastNode; }
+
+  //! Allocates and instantiates a new node of type `T` and returns its instance. If the allocation fails `nullptr`
+  //! is returned.
+  //!
+  //! The template argument `T` must be a type that is extends \ref BaseNode.
+  //!
+  //! \remarks The pointer returned (if non-null) is owned by the Builder or Compiler. When the Builder/Compiler
+  //! is destroyed it destroys all nodes it created so no manual memory management is required.
+  template<typename T, typename... Args>
+  inline Error _newNodeT(T** ASMJIT_NONNULL(out), Args&&... args) {
+    *out = _allocator.newT<T>(this, std::forward<Args>(args)...);
+    if (ASMJIT_UNLIKELY(!*out))
+      return reportError(DebugUtils::errored(kErrorOutOfMemory));
+    return kErrorOk;
+  }
+
+  //! Creates a new \ref InstNode.
+  ASMJIT_API Error newInstNode(InstNode** ASMJIT_NONNULL(out), InstId instId, InstOptions instOptions, uint32_t opCount);
+  //! Creates a new \ref LabelNode.
+  ASMJIT_API Error newLabelNode(LabelNode** ASMJIT_NONNULL(out));
+  //! Creates a new \ref AlignNode.
+  ASMJIT_API Error newAlignNode(AlignNode** ASMJIT_NONNULL(out), AlignMode alignMode, uint32_t alignment);
+  //! Creates a new \ref EmbedDataNode.
+  ASMJIT_API Error newEmbedDataNode(EmbedDataNode** ASMJIT_NONNULL(out), TypeId typeId, const void* data, size_t itemCount, size_t repeatCount = 1);
+  //! Creates a new \ref ConstPoolNode.
+  ASMJIT_API Error newConstPoolNode(ConstPoolNode** ASMJIT_NONNULL(out));
+  //! Creates a new \ref CommentNode.
+  ASMJIT_API Error newCommentNode(CommentNode** ASMJIT_NONNULL(out), const char* data, size_t size);
+
+  //! Adds `node` after the current and sets the current node to the given `node`.
+  ASMJIT_API BaseNode* addNode(BaseNode* ASMJIT_NONNULL(node)) noexcept;
+  //! Inserts the given `node` after `ref`.
+  ASMJIT_API BaseNode* addAfter(BaseNode* ASMJIT_NONNULL(node), BaseNode* ASMJIT_NONNULL(ref)) noexcept;
+  //! Inserts the given `node` before `ref`.
+  ASMJIT_API BaseNode* addBefore(BaseNode* ASMJIT_NONNULL(node), BaseNode* ASMJIT_NONNULL(ref)) noexcept;
+  //! Removes the given `node`.
+  ASMJIT_API BaseNode* removeNode(BaseNode* ASMJIT_NONNULL(node)) noexcept;
+  //! Removes multiple nodes.
+  ASMJIT_API void removeNodes(BaseNode* first, BaseNode* last) noexcept;
+
+  //! Returns the cursor.
+  //!
+  //! When the Builder/Compiler is created it automatically creates a '.text' \ref SectionNode, which will be the
+  //! initial one. When instructions are added they are always added after the cursor and the cursor is changed
+  //! to be that newly added node. Use `setCursor()` to change where new nodes are inserted.
+  inline BaseNode* cursor() const noexcept { return _cursor; }
+
+  //! Sets the current node to `node` and return the previous one.
+  ASMJIT_API BaseNode* setCursor(BaseNode* node) noexcept;
+
+  //! Sets the current node without returning the previous node.
+  //!
+  //! Only use this function if you are concerned about performance and want this inlined (for example if you set
+  //! the cursor in a loop, etc...).
+  inline void _setCursor(BaseNode* node) noexcept { _cursor = node; }
+
+  //! \}
+
+  //! \name Section Management
+  //! \{
+
+  //! Returns a vector of SectionNode objects.
+  //!
+  //! \note If a section of some id is not associated with the Builder/Compiler it would be null, so always check
+  //! for nulls if you iterate over the vector.
+  inline const ZoneVector<SectionNode*>& sectionNodes() const noexcept {
+    return _sectionNodes;
+  }
+
+  //! Tests whether the `SectionNode` of the given `sectionId` was registered.
+  inline bool hasRegisteredSectionNode(uint32_t sectionId) const noexcept {
+    return sectionId < _sectionNodes.size() && _sectionNodes[sectionId] != nullptr;
+  }
+
+  //! Returns or creates a `SectionNode` that matches the given `sectionId`.
+  //!
+  //! \remarks This function will either get the existing `SectionNode` or create it in case it wasn't created before.
+  //! You can check whether a section has a registered `SectionNode` by using `BaseBuilder::hasRegisteredSectionNode()`.
+  ASMJIT_API Error sectionNodeOf(SectionNode** ASMJIT_NONNULL(out), uint32_t sectionId);
+
+  ASMJIT_API Error section(Section* ASMJIT_NONNULL(section)) override;
+
+  //! Returns whether the section links of active section nodes are dirty. You can update these links by calling
+  //! `updateSectionLinks()` in such case.
+  inline bool hasDirtySectionLinks() const noexcept { return _dirtySectionLinks; }
+
+  //! Updates links of all active section nodes.
+  ASMJIT_API void updateSectionLinks() noexcept;
+
+  //! \}
+
+  //! \name Label Management
+  //! \{
+
+  //! Returns a vector of \ref LabelNode nodes.
+  //!
+  //! \note If a label of some id is not associated with the Builder/Compiler it would be null, so always check for
+  //! nulls if you iterate over the vector.
+  inline const ZoneVector<LabelNode*>& labelNodes() const noexcept { return _labelNodes; }
+
+  //! Tests whether the `LabelNode` of the given `labelId` was registered.
+  inline bool hasRegisteredLabelNode(uint32_t labelId) const noexcept {
+    return labelId < _labelNodes.size() && _labelNodes[labelId] != nullptr;
+  }
+
+  //! \overload
+  inline bool hasRegisteredLabelNode(const Label& label) const noexcept {
+    return hasRegisteredLabelNode(label.id());
+  }
+
+  //! Gets or creates a \ref LabelNode that matches the given `labelId`.
+  //!
+  //! \remarks This function will either get the existing `LabelNode` or create it in case it wasn't created before.
+  //! You can check whether a label has a registered `LabelNode` by calling \ref BaseBuilder::hasRegisteredLabelNode().
+  ASMJIT_API Error labelNodeOf(LabelNode** ASMJIT_NONNULL(out), uint32_t labelId);
+
+  //! \overload
+  inline Error labelNodeOf(LabelNode** ASMJIT_NONNULL(out), const Label& label) {
+    return labelNodeOf(out, label.id());
+  }
+
+  //! Registers this \ref LabelNode (internal).
+  //!
+  //! This function is used internally to register a newly created `LabelNode` with this instance of Builder/Compiler.
+  //! Use \ref labelNodeOf() functions to get back \ref LabelNode from a label or its identifier.
+  ASMJIT_API Error registerLabelNode(LabelNode* ASMJIT_NONNULL(node));
+
+  ASMJIT_API Label newLabel() override;
+  ASMJIT_API Label newNamedLabel(const char* name, size_t nameSize = SIZE_MAX, LabelType type = LabelType::kGlobal, uint32_t parentId = Globals::kInvalidId) override;
+  ASMJIT_API Error bind(const Label& label) override;
+
+  //! \}
+
+  //! \name Passes
+  //! \{
+
+  //! Returns a vector of `Pass` instances that will be executed by `runPasses()`.
+  inline const ZoneVector<Pass*>& passes() const noexcept { return _passes; }
+
+  //! Allocates and instantiates a new pass of type `T` and returns its instance. If the allocation fails `nullptr` is
+  //! returned.
+  //!
+  //! The template argument `T` must be a type that is extends \ref Pass.
+  //!
+  //! \remarks The pointer returned (if non-null) is owned by the Builder or Compiler. When the Builder/Compiler is
+  //! destroyed it destroys all passes it created so no manual memory management is required.
+  template<typename T>
+  inline T* newPassT() noexcept { return _codeZone.newT<T>(); }
+
+  //! \overload
+  template<typename T, typename... Args>
+  inline T* newPassT(Args&&... args) noexcept { return _codeZone.newT<T>(std::forward<Args>(args)...); }
+
+  template<typename T>
+  inline Error addPassT() { return addPass(newPassT<T>()); }
+
+  template<typename T, typename... Args>
+  inline Error addPassT(Args&&... args) { return addPass(newPassT<T, Args...>(std::forward<Args>(args)...)); }
+
+  //! Returns `Pass` by name.
+  //!
+  //! If the pass having the given `name` doesn't exist `nullptr` is returned.
+  ASMJIT_API Pass* passByName(const char* name) const noexcept;
+  //! Adds `pass` to the list of passes.
+  ASMJIT_API Error addPass(Pass* pass) noexcept;
+  //! Removes `pass` from the list of passes and delete it.
+  ASMJIT_API Error deletePass(Pass* pass) noexcept;
+
+  //! Runs all passes in order.
+  ASMJIT_API Error runPasses();
+
+  //! \}
+
+  //! \name Emit
+  //! \{
+
+  ASMJIT_API Error _emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) override;
+
+  //! \}
+
+  //! \name Align
+  //! \{
+
+  ASMJIT_API Error align(AlignMode alignMode, uint32_t alignment) override;
+
+  //! \}
+
+  //! \name Embed
+  //! \{
+
+  ASMJIT_API Error embed(const void* data, size_t dataSize) override;
+  ASMJIT_API Error embedDataArray(TypeId typeId, const void* data, size_t count, size_t repeat = 1) override;
+  ASMJIT_API Error embedConstPool(const Label& label, const ConstPool& pool) override;
+
+  ASMJIT_API Error embedLabel(const Label& label, size_t dataSize = 0) override;
+  ASMJIT_API Error embedLabelDelta(const Label& label, const Label& base, size_t dataSize = 0) override;
+
+  //! \}
+
+  //! \name Comment
+  //! \{
+
+  ASMJIT_API Error comment(const char* data, size_t size = SIZE_MAX) override;
+
+  //! \}
+
+  //! \name Serialization
+  //! \{
+
+  //! Serializes everything the given emitter `dst`.
+  //!
+  //! Although not explicitly required the emitter will most probably be of Assembler type. The reason is that
+  //! there is no known use of serializing nodes held by Builder/Compiler into another Builder-like emitter.
+  ASMJIT_API Error serializeTo(BaseEmitter* dst);
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+};
+
+//! Base node.
+//!
+//! Every node represents a building-block used by \ref BaseBuilder. It can be instruction, data, label, comment,
+//! directive, or any other high-level representation that can be transformed to the building blocks mentioned.
+//! Every class that inherits \ref BaseBuilder can define its own high-level nodes that can be later lowered to
+//! basic nodes like instructions.
+class BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(BaseNode)
+
+  //! \name Members
+  //! \{
+
+  union {
+    struct {
+      //! Previous node.
+      BaseNode* _prev;
+      //! Next node.
+      BaseNode* _next;
+    };
+    //! Links (an alternative view to previous and next nodes).
+    BaseNode* _links[2];
+  };
+
+  //! Data shared between all types of nodes.
+  struct AnyData {
+    //! Node type.
+    NodeType _nodeType;
+    //! Node flags.
+    NodeFlags _nodeFlags;
+    //! Not used by BaseNode.
+    uint8_t _reserved0;
+    //! Not used by BaseNode.
+    uint8_t _reserved1;
+  };
+
+  //! Data used by \ref AlignNode.
+  struct AlignData {
+    //! Node type.
+    NodeType _nodeType;
+    //! Node flags.
+    NodeFlags _nodeFlags;
+    //! Align mode.
+    AlignMode _alignMode;
+    //! Not used by AlignNode.
+    uint8_t _reserved;
+  };
+
+  //! Data used by \ref InstNode.
+  struct InstData {
+    //! Node type.
+    NodeType _nodeType;
+    //! Node flags.
+    NodeFlags _nodeFlags;
+    //! Instruction operands count (used).
+    uint8_t _opCount;
+    //! Instruction operands capacity (allocated).
+    uint8_t _opCapacity;
+  };
+
+  //! Data used by \ref EmbedDataNode.
+  struct EmbedData {
+    //! Node type.
+    NodeType _nodeType;
+    //! Node flags.
+    NodeFlags _nodeFlags;
+    //! Type id.
+    TypeId _typeId;
+    //! Size of `_typeId`.
+    uint8_t _typeSize;
+  };
+
+  //! Data used by \ref SentinelNode.
+  struct SentinelData {
+    //! Node type.
+    NodeType _nodeType;
+    //! Node flags.
+    NodeFlags _nodeFlags;
+    //! Sentinel type.
+    SentinelType _sentinelType;
+    //! Not used by BaseNode.
+    uint8_t _reserved1;
+  };
+
+  //! Data that can have different meaning dependning on \ref NodeType.
+  union {
+    //! Data useful by any node type.
+    AnyData _any;
+    //! Data specific to \ref AlignNode.
+    AlignData _alignData;
+    //! Data specific to \ref InstNode.
+    InstData _inst;
+    //! Data specific to \ref EmbedDataNode.
+    EmbedData _embed;
+    //! Data specific to \ref SentinelNode.
+    SentinelData _sentinel;
+  };
+
+  //! Node position in code (should be unique).
+  uint32_t _position;
+
+  //! Value reserved for AsmJit users never touched by AsmJit itself.
+  union {
+    //! User data as 64-bit integer.
+    uint64_t _userDataU64;
+    //! User data as pointer.
+    void* _userDataPtr;
+  };
+
+  //! Data used exclusively by the current `Pass`.
+  void* _passData;
+
+  //! Inline comment/annotation or nullptr if not used.
+  const char* _inlineComment;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `BaseNode` - always use `BaseBuilder` to allocate nodes.
+  inline BaseNode(BaseBuilder* cb, NodeType nodeType, NodeFlags nodeFlags = NodeFlags::kNone) noexcept {
+    _prev = nullptr;
+    _next = nullptr;
+    _any._nodeType = nodeType;
+    _any._nodeFlags = nodeFlags | cb->_nodeFlags;
+    _any._reserved0 = 0;
+    _any._reserved1 = 0;
+    _position = 0;
+    _userDataU64 = 0;
+    _passData = nullptr;
+    _inlineComment = nullptr;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Casts this node to `T*`.
+  template<typename T>
+  inline T* as() noexcept { return static_cast<T*>(this); }
+  //! Casts this node to `const T*`.
+  template<typename T>
+  inline const T* as() const noexcept { return static_cast<const T*>(this); }
+
+  //! Returns previous node or `nullptr` if this node is either first or not
+  //! part of Builder/Compiler node-list.
+  inline BaseNode* prev() const noexcept { return _prev; }
+  //! Returns next node or `nullptr` if this node is either last or not part
+  //! of Builder/Compiler node-list.
+  inline BaseNode* next() const noexcept { return _next; }
+
+  //! Returns the type of the node, see `NodeType`.
+  inline NodeType type() const noexcept { return _any._nodeType; }
+
+  //! Sets the type of the node, see `NodeType` (internal).
+  //!
+  //! \remarks You should never set a type of a node to anything else than the initial value. This function is only
+  //! provided for users that use custom nodes and need to change the type either during construction or later.
+  inline void setType(NodeType type) noexcept { _any._nodeType = type; }
+
+  //! Tests whether this node is either `InstNode` or extends it.
+  inline bool isInst() const noexcept { return hasFlag(NodeFlags::kActsAsInst); }
+  //! Tests whether this node is `SectionNode`.
+  inline bool isSection() const noexcept { return type() == NodeType::kSection; }
+  //! Tests whether this node is either `LabelNode` or extends it.
+  inline bool isLabel() const noexcept { return hasFlag(NodeFlags::kActsAsLabel); }
+  //! Tests whether this node is `AlignNode`.
+  inline bool isAlign() const noexcept { return type() == NodeType::kAlign; }
+  //! Tests whether this node is `EmbedDataNode`.
+  inline bool isEmbedData() const noexcept { return type() == NodeType::kEmbedData; }
+  //! Tests whether this node is `EmbedLabelNode`.
+  inline bool isEmbedLabel() const noexcept { return type() == NodeType::kEmbedLabel; }
+  //! Tests whether this node is `EmbedLabelDeltaNode`.
+  inline bool isEmbedLabelDelta() const noexcept { return type() == NodeType::kEmbedLabelDelta; }
+  //! Tests whether this node is `ConstPoolNode`.
+  inline bool isConstPool() const noexcept { return type() == NodeType::kConstPool; }
+  //! Tests whether this node is `CommentNode`.
+  inline bool isComment() const noexcept { return type() == NodeType::kComment; }
+  //! Tests whether this node is `SentinelNode`.
+  inline bool isSentinel() const noexcept { return type() == NodeType::kSentinel; }
+
+  //! Tests whether this node is `FuncNode`.
+  inline bool isFunc() const noexcept { return type() == NodeType::kFunc; }
+  //! Tests whether this node is `FuncRetNode`.
+  inline bool isFuncRet() const noexcept { return type() == NodeType::kFuncRet; }
+  //! Tests whether this node is `InvokeNode`.
+  inline bool isInvoke() const noexcept { return type() == NodeType::kInvoke; }
+
+  //! Returns the node flags.
+  inline NodeFlags flags() const noexcept { return _any._nodeFlags; }
+  //! Tests whether the node has the given `flag` set.
+  inline bool hasFlag(NodeFlags flag) const noexcept { return Support::test(_any._nodeFlags, flag); }
+  //! Replaces node flags with `flags`.
+  inline void setFlags(NodeFlags flags) noexcept { _any._nodeFlags = flags; }
+  //! Adds the given `flags` to node flags.
+  inline void addFlags(NodeFlags flags) noexcept { _any._nodeFlags |= flags; }
+  //! Clears the given `flags` from node flags.
+  inline void clearFlags(NodeFlags flags) noexcept { _any._nodeFlags &= ~flags; }
+
+  //! Tests whether the node is code that can be executed.
+  inline bool isCode() const noexcept { return hasFlag(NodeFlags::kIsCode); }
+  //! Tests whether the node is data that cannot be executed.
+  inline bool isData() const noexcept { return hasFlag(NodeFlags::kIsData); }
+  //! Tests whether the node is informative only (is never encoded like comment, etc...).
+  inline bool isInformative() const noexcept { return hasFlag(NodeFlags::kIsInformative); }
+  //! Tests whether the node is removable if it's in an unreachable code block.
+  inline bool isRemovable() const noexcept { return hasFlag(NodeFlags::kIsRemovable); }
+  //! Tests whether the node has no effect when executed (label, .align, nop, ...).
+  inline bool hasNoEffect() const noexcept { return hasFlag(NodeFlags::kHasNoEffect); }
+  //! Tests whether the node is part of the code.
+  inline bool isActive() const noexcept { return hasFlag(NodeFlags::kIsActive); }
+
+  //! Tests whether the node has a position assigned.
+  //!
+  //! \remarks Returns `true` if node position is non-zero.
+  inline bool hasPosition() const noexcept { return _position != 0; }
+  //! Returns node position.
+  inline uint32_t position() const noexcept { return _position; }
+  //! Sets node position.
+  //!
+  //! Node position is a 32-bit unsigned integer that is used by Compiler to track where the node is relatively to
+  //! the start of the function. It doesn't describe a byte position in a binary, instead it's just a pseudo position
+  //! used by liveness analysis and other tools around Compiler.
+  //!
+  //! If you don't use Compiler then you may use `position()` and `setPosition()` freely for your own purposes if
+  //! the 32-bit value limit is okay for you.
+  inline void setPosition(uint32_t position) noexcept { _position = position; }
+
+  //! Returns user data casted to `T*`.
+  //!
+  //! User data is decicated to be used only by AsmJit users and not touched by the library. The data has a pointer
+  //! size so you can either store a pointer or `intptr_t` value through `setUserDataAsIntPtr()`.
+  template<typename T>
+  inline T* userDataAsPtr() const noexcept { return static_cast<T*>(_userDataPtr); }
+  //! Returns user data casted to `int64_t`.
+  inline int64_t userDataAsInt64() const noexcept { return int64_t(_userDataU64); }
+  //! Returns user data casted to `uint64_t`.
+  inline uint64_t userDataAsUInt64() const noexcept { return _userDataU64; }
+
+  //! Sets user data to `data`.
+  template<typename T>
+  inline void setUserDataAsPtr(T* data) noexcept { _userDataPtr = static_cast<void*>(data); }
+  //! Sets used data to the given 64-bit signed `value`.
+  inline void setUserDataAsInt64(int64_t value) noexcept { _userDataU64 = uint64_t(value); }
+  //! Sets used data to the given 64-bit unsigned `value`.
+  inline void setUserDataAsUInt64(uint64_t value) noexcept { _userDataU64 = value; }
+
+  //! Resets user data to zero / nullptr.
+  inline void resetUserData() noexcept { _userDataU64 = 0; }
+
+  //! Tests whether the node has an associated pass data.
+  inline bool hasPassData() const noexcept { return _passData != nullptr; }
+  //! Returns the node pass data - data used during processing & transformations.
+  template<typename T>
+  inline T* passData() const noexcept { return (T*)_passData; }
+  //! Sets the node pass data to `data`.
+  template<typename T>
+  inline void setPassData(T* data) noexcept { _passData = (void*)data; }
+  //! Resets the node pass data to nullptr.
+  inline void resetPassData() noexcept { _passData = nullptr; }
+
+  //! Tests whether the node has an inline comment/annotation.
+  inline bool hasInlineComment() const noexcept { return _inlineComment != nullptr; }
+  //! Returns an inline comment/annotation string.
+  inline const char* inlineComment() const noexcept { return _inlineComment; }
+  //! Sets an inline comment/annotation string to `s`.
+  inline void setInlineComment(const char* s) noexcept { _inlineComment = s; }
+  //! Resets an inline comment/annotation string to nullptr.
+  inline void resetInlineComment() noexcept { _inlineComment = nullptr; }
+
+  //! \}
+};
+
+//! Instruction node.
+//!
+//! Wraps an instruction with its options and operands.
+class InstNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(InstNode)
+
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    //! Count of embedded operands per `InstNode` that are always allocated as a part of the instruction. Minimum
+    //! embedded operands is 4, but in 32-bit more pointers are smaller and we can embed 5. The rest (up to 6 operands)
+    //! is always stored in `InstExNode`.
+    kBaseOpCapacity = uint32_t((128 - sizeof(BaseNode) - sizeof(BaseInst)) / sizeof(Operand_))
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Base instruction data.
+  BaseInst _baseInst;
+  //! First 4 or 5 operands (indexed from 0).
+  Operand_ _opArray[kBaseOpCapacity];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `InstNode` instance.
+  inline InstNode(BaseBuilder* cb, InstId instId, InstOptions options, uint32_t opCount, uint32_t opCapacity = kBaseOpCapacity) noexcept
+    : BaseNode(cb, NodeType::kInst, NodeFlags::kIsCode | NodeFlags::kIsRemovable | NodeFlags::kActsAsInst),
+      _baseInst(instId, options) {
+    _inst._opCapacity = uint8_t(opCapacity);
+    _inst._opCount = uint8_t(opCount);
+  }
+
+  //! \cond INTERNAL
+  //! Reset all built-in operands, including `extraReg`.
+  inline void _resetOps() noexcept {
+    _baseInst.resetExtraReg();
+    resetOpRange(0, opCapacity());
+  }
+  //! \endcond
+
+  //! \}
+
+  //! \name Instruction Object
+  //! \{
+
+  inline BaseInst& baseInst() noexcept { return _baseInst; }
+  inline const BaseInst& baseInst() const noexcept { return _baseInst; }
+
+  //! \}
+
+  //! \name Instruction Id
+  //! \{
+
+  //! Returns the instruction id, see `BaseInst::Id`.
+  inline InstId id() const noexcept { return _baseInst.id(); }
+  //! Returns the instruction real id, see `BaseInst::Id`.
+  inline InstId realId() const noexcept { return _baseInst.realId(); }
+
+  //! Sets the instruction id to `id`, see `BaseInst::Id`.
+  inline void setId(InstId id) noexcept { _baseInst.setId(id); }
+
+  //! \}
+
+  //! \name Instruction Options
+  //! \{
+
+  inline InstOptions options() const noexcept { return _baseInst.options(); }
+  inline bool hasOption(InstOptions option) const noexcept { return _baseInst.hasOption(option); }
+  inline void setOptions(InstOptions options) noexcept { _baseInst.setOptions(options); }
+  inline void addOptions(InstOptions options) noexcept { _baseInst.addOptions(options); }
+  inline void clearOptions(InstOptions options) noexcept { _baseInst.clearOptions(options); }
+  inline void resetOptions() noexcept { _baseInst.resetOptions(); }
+
+  //! \}
+
+  //! \name Extra Register
+  //! \{
+
+  //! Tests whether the node has an extra register operand.
+  inline bool hasExtraReg() const noexcept { return _baseInst.hasExtraReg(); }
+  //! Returns extra register operand.
+  inline RegOnly& extraReg() noexcept { return _baseInst.extraReg(); }
+  //! \overload
+  inline const RegOnly& extraReg() const noexcept { return _baseInst.extraReg(); }
+  //! Sets extra register operand to `reg`.
+  inline void setExtraReg(const BaseReg& reg) noexcept { _baseInst.setExtraReg(reg); }
+  //! Sets extra register operand to `reg`.
+  inline void setExtraReg(const RegOnly& reg) noexcept { _baseInst.setExtraReg(reg); }
+  //! Resets extra register operand.
+  inline void resetExtraReg() noexcept { _baseInst.resetExtraReg(); }
+
+  //! \}
+
+  //! \name Instruction Operands
+  //! \{
+
+  //! Returns operand count.
+  inline uint32_t opCount() const noexcept { return _inst._opCount; }
+  //! Returns operand capacity.
+  inline uint32_t opCapacity() const noexcept { return _inst._opCapacity; }
+
+  //! Sets operand count.
+  inline void setOpCount(uint32_t opCount) noexcept { _inst._opCount = uint8_t(opCount); }
+
+  //! Returns operands array.
+  inline Operand* operands() noexcept { return (Operand*)_opArray; }
+  //! Returns operands array (const).
+  inline const Operand* operands() const noexcept { return (const Operand*)_opArray; }
+
+  //! Returns operand at the given `index`.
+  inline Operand& op(uint32_t index) noexcept {
+    ASMJIT_ASSERT(index < opCapacity());
+    return _opArray[index].as<Operand>();
+  }
+
+  //! Returns operand at the given `index` (const).
+  inline const Operand& op(uint32_t index) const noexcept {
+    ASMJIT_ASSERT(index < opCapacity());
+    return _opArray[index].as<Operand>();
+  }
+
+  //! Sets operand at the given `index` to `op`.
+  inline void setOp(uint32_t index, const Operand_& op) noexcept {
+    ASMJIT_ASSERT(index < opCapacity());
+    _opArray[index].copyFrom(op);
+  }
+
+  //! Resets operand at the given `index` to none.
+  inline void resetOp(uint32_t index) noexcept {
+    ASMJIT_ASSERT(index < opCapacity());
+    _opArray[index].reset();
+  }
+
+  //! Resets operands at `[start, end)` range.
+  inline void resetOpRange(uint32_t start, uint32_t end) noexcept {
+    for (uint32_t i = start; i < end; i++)
+      _opArray[i].reset();
+  }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline bool hasOpType(OperandType opType) const noexcept {
+    for (uint32_t i = 0, count = opCount(); i < count; i++)
+      if (_opArray[i].opType() == opType)
+        return true;
+    return false;
+  }
+
+  inline bool hasRegOp() const noexcept { return hasOpType(OperandType::kReg); }
+  inline bool hasMemOp() const noexcept { return hasOpType(OperandType::kMem); }
+  inline bool hasImmOp() const noexcept { return hasOpType(OperandType::kImm); }
+  inline bool hasLabelOp() const noexcept { return hasOpType(OperandType::kLabel); }
+
+  inline uint32_t indexOfOpType(OperandType opType) const noexcept {
+    uint32_t i = 0;
+    uint32_t count = opCount();
+
+    while (i < count) {
+      if (_opArray[i].opType() == opType)
+        break;
+      i++;
+    }
+
+    return i;
+  }
+
+  inline uint32_t indexOfMemOp() const noexcept { return indexOfOpType(OperandType::kMem); }
+  inline uint32_t indexOfImmOp() const noexcept { return indexOfOpType(OperandType::kImm); }
+  inline uint32_t indexOfLabelOp() const noexcept { return indexOfOpType(OperandType::kLabel); }
+
+  //! \}
+
+  //! \name Rewriting
+  //! \{
+
+  //! \cond INTERNAL
+  inline uint32_t* _getRewriteArray() noexcept { return &_baseInst._extraReg._id; }
+  inline const uint32_t* _getRewriteArray() const noexcept { return &_baseInst._extraReg._id; }
+
+  inline uint32_t getRewriteIndex(const uint32_t* id) const noexcept {
+    const uint32_t* array = _getRewriteArray();
+    ASMJIT_ASSERT(array <= id);
+
+    size_t index = (size_t)(id - array);
+    ASMJIT_ASSERT(index < 32);
+
+    return uint32_t(index);
+  }
+
+  inline void rewriteIdAtIndex(uint32_t index, uint32_t id) noexcept {
+    uint32_t* array = _getRewriteArray();
+    array[index] = id;
+  }
+  //! \endcond
+
+  //! \}
+
+  //! \name Static Functions
+  //! \{
+
+  //! \cond INTERNAL
+  static inline uint32_t capacityOfOpCount(uint32_t opCount) noexcept {
+    return opCount <= kBaseOpCapacity ? kBaseOpCapacity : Globals::kMaxOpCount;
+  }
+
+  static inline size_t nodeSizeOfOpCapacity(uint32_t opCapacity) noexcept {
+    size_t base = sizeof(InstNode) - kBaseOpCapacity * sizeof(Operand);
+    return base + opCapacity * sizeof(Operand);
+  }
+  //! \endcond
+
+  //! \}
+};
+
+//! Instruction node with maximum number of operands.
+//!
+//! This node is created automatically by Builder/Compiler in case that the required number of operands exceeds
+//! the default capacity of `InstNode`.
+class InstExNode : public InstNode {
+public:
+  ASMJIT_NONCOPYABLE(InstExNode)
+
+  //! \name Members
+  //! \{
+
+  //! Continued `_opArray[]` to hold up to `kMaxOpCount` operands.
+  Operand_ _opArrayEx[Globals::kMaxOpCount - kBaseOpCapacity];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `InstExNode` instance.
+  inline InstExNode(BaseBuilder* cb, InstId instId, InstOptions options, uint32_t opCapacity = Globals::kMaxOpCount) noexcept
+    : InstNode(cb, instId, options, opCapacity) {}
+
+  //! \}
+};
+
+//! Section node.
+class SectionNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(SectionNode)
+
+  //! \name Members
+  //! \{
+
+  //! Section id.
+  uint32_t _id;
+
+  //! Next section node that follows this section.
+  //!
+  //! This link is only valid when the section is active (is part of the code) and when `Builder::hasDirtySectionLinks()`
+  //! returns `false`. If you intend to use this field you should always call `Builder::updateSectionLinks()` before you
+  //! do so.
+  SectionNode* _nextSection;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `SectionNode` instance.
+  inline SectionNode(BaseBuilder* cb, uint32_t secionId = 0) noexcept
+    : BaseNode(cb, NodeType::kSection, NodeFlags::kHasNoEffect),
+      _id(secionId),
+      _nextSection(nullptr) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the section id.
+  inline uint32_t id() const noexcept { return _id; }
+
+  //! \}
+};
+
+//! Label node.
+class LabelNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(LabelNode)
+
+  //! \name Members
+  //! \{
+
+  //! Label identifier.
+  uint32_t _labelId;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `LabelNode` instance.
+  inline LabelNode(BaseBuilder* cb, uint32_t labelId = 0) noexcept
+    : BaseNode(cb, NodeType::kLabel, NodeFlags::kHasNoEffect | NodeFlags::kActsAsLabel),
+      _labelId(labelId) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns \ref Label representation of the \ref LabelNode.
+  inline Label label() const noexcept { return Label(_labelId); }
+  //! Returns the id of the label.
+  inline uint32_t labelId() const noexcept { return _labelId; }
+
+  //! \}
+};
+
+//! Align directive (BaseBuilder).
+//!
+//! Wraps `.align` directive.
+class AlignNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(AlignNode)
+
+  //! \name Members
+  //! \{
+
+  //! Alignment (in bytes).
+  uint32_t _alignment;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `AlignNode` instance.
+  inline AlignNode(BaseBuilder* cb, AlignMode alignMode, uint32_t alignment) noexcept
+    : BaseNode(cb, NodeType::kAlign, NodeFlags::kIsCode | NodeFlags::kHasNoEffect) {
+
+    _alignData._alignMode = alignMode;
+    _alignment = alignment;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns align mode.
+  inline AlignMode alignMode() const noexcept { return _alignData._alignMode; }
+  //! Sets align mode to `alignMode`.
+  inline void setAlignMode(AlignMode alignMode) noexcept { _alignData._alignMode = alignMode; }
+
+  //! Returns align offset in bytes.
+  inline uint32_t alignment() const noexcept { return _alignment; }
+  //! Sets align offset in bytes to `offset`.
+  inline void setAlignment(uint32_t alignment) noexcept { _alignment = alignment; }
+
+  //! \}
+};
+
+//! Embed data node.
+//!
+//! Wraps `.data` directive. The node contains data that will be placed at the node's position in the assembler
+//! stream. The data is considered to be RAW; no analysis nor byte-order conversion is performed on RAW data.
+class EmbedDataNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(EmbedDataNode)
+
+  //! \cond INTERNAL
+  enum : uint32_t {
+    kInlineBufferSize = 128 - (sizeof(BaseNode) + sizeof(size_t) * 2)
+  };
+  //! \endcond
+
+  //! \name Members
+  //! \{
+
+  size_t _itemCount;
+  size_t _repeatCount;
+
+  union {
+    uint8_t* _externalData;
+    uint8_t _inlineData[kInlineBufferSize];
+  };
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `EmbedDataNode` instance.
+  inline EmbedDataNode(BaseBuilder* cb) noexcept
+    : BaseNode(cb, NodeType::kEmbedData, NodeFlags::kIsData),
+      _itemCount(0),
+      _repeatCount(0) {
+    _embed._typeId = TypeId::kUInt8;
+    _embed._typeSize = uint8_t(1);
+    memset(_inlineData, 0, kInlineBufferSize);
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns data type as \ref TypeId.
+  inline TypeId typeId() const noexcept { return _embed._typeId; }
+  //! Returns the size of a single data element.
+  inline uint32_t typeSize() const noexcept { return _embed._typeSize; }
+
+  //! Returns a pointer to the data casted to `uint8_t`.
+  inline uint8_t* data() const noexcept {
+    return dataSize() <= kInlineBufferSize ? const_cast<uint8_t*>(_inlineData) : _externalData;
+  }
+
+  //! Returns a pointer to the data casted to `T`.
+  template<typename T>
+  inline T* dataAs() const noexcept { return reinterpret_cast<T*>(data()); }
+
+  //! Returns the number of (typed) items in the array.
+  inline size_t itemCount() const noexcept { return _itemCount; }
+
+  //! Returns how many times the data is repeated (default 1).
+  //!
+  //! Repeated data is useful when defining constants for SIMD, for example.
+  inline size_t repeatCount() const noexcept { return _repeatCount; }
+
+  //! Returns the size of the data, not considering the number of times it repeats.
+  //!
+  //! \note The returned value is the same as `typeSize() * itemCount()`.
+  inline size_t dataSize() const noexcept { return typeSize() * _itemCount; }
+
+  //! \}
+};
+
+//! Label data node.
+class EmbedLabelNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(EmbedLabelNode)
+
+  //! \name Members
+  //! \{
+
+  uint32_t _labelId;
+  uint32_t _dataSize;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `EmbedLabelNode` instance.
+  inline EmbedLabelNode(BaseBuilder* cb, uint32_t labelId = 0, uint32_t dataSize = 0) noexcept
+    : BaseNode(cb, NodeType::kEmbedLabel, NodeFlags::kIsData),
+      _labelId(labelId),
+      _dataSize(dataSize) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the label to embed as \ref Label operand.
+  inline Label label() const noexcept { return Label(_labelId); }
+  //! Returns the id of the label.
+  inline uint32_t labelId() const noexcept { return _labelId; }
+
+  //! Sets the label id from `label` operand.
+  inline void setLabel(const Label& label) noexcept { setLabelId(label.id()); }
+  //! Sets the label id (use with caution, improper use can break a lot of things).
+  inline void setLabelId(uint32_t labelId) noexcept { _labelId = labelId; }
+
+  //! Returns the data size.
+  inline uint32_t dataSize() const noexcept { return _dataSize; }
+  //! Sets the data size.
+  inline void setDataSize(uint32_t dataSize) noexcept { _dataSize = dataSize; }
+
+  //! \}
+};
+
+//! Label data node.
+class EmbedLabelDeltaNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(EmbedLabelDeltaNode)
+
+  //! \name Members
+  //! \{
+
+  uint32_t _labelId;
+  uint32_t _baseLabelId;
+  uint32_t _dataSize;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `EmbedLabelDeltaNode` instance.
+  inline EmbedLabelDeltaNode(BaseBuilder* cb, uint32_t labelId = 0, uint32_t baseLabelId = 0, uint32_t dataSize = 0) noexcept
+    : BaseNode(cb, NodeType::kEmbedLabelDelta, NodeFlags::kIsData),
+      _labelId(labelId),
+      _baseLabelId(baseLabelId),
+      _dataSize(dataSize) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the label as `Label` operand.
+  inline Label label() const noexcept { return Label(_labelId); }
+  //! Returns the id of the label.
+  inline uint32_t labelId() const noexcept { return _labelId; }
+
+  //! Sets the label id from `label` operand.
+  inline void setLabel(const Label& label) noexcept { setLabelId(label.id()); }
+  //! Sets the label id.
+  inline void setLabelId(uint32_t labelId) noexcept { _labelId = labelId; }
+
+  //! Returns the base label as `Label` operand.
+  inline Label baseLabel() const noexcept { return Label(_baseLabelId); }
+  //! Returns the id of the base label.
+  inline uint32_t baseLabelId() const noexcept { return _baseLabelId; }
+
+  //! Sets the base label id from `label` operand.
+  inline void setBaseLabel(const Label& baseLabel) noexcept { setBaseLabelId(baseLabel.id()); }
+  //! Sets the base label id.
+  inline void setBaseLabelId(uint32_t baseLabelId) noexcept { _baseLabelId = baseLabelId; }
+
+  //! Returns the size of the embedded label address.
+  inline uint32_t dataSize() const noexcept { return _dataSize; }
+  //! Sets the size of the embedded label address.
+  inline void setDataSize(uint32_t dataSize) noexcept { _dataSize = dataSize; }
+
+  //! \}
+};
+
+//! A node that wraps `ConstPool`.
+class ConstPoolNode : public LabelNode {
+public:
+  ASMJIT_NONCOPYABLE(ConstPoolNode)
+
+  //! \name Members
+  //! \{
+
+  ConstPool _constPool;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `ConstPoolNode` instance.
+  inline ConstPoolNode(BaseBuilder* cb, uint32_t id = 0) noexcept
+    : LabelNode(cb, id),
+      _constPool(&cb->_codeZone) {
+
+    setType(NodeType::kConstPool);
+    addFlags(NodeFlags::kIsData);
+    clearFlags(NodeFlags::kIsCode | NodeFlags::kHasNoEffect);
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether the constant-pool is empty.
+  inline bool empty() const noexcept { return _constPool.empty(); }
+  //! Returns the size of the constant-pool in bytes.
+  inline size_t size() const noexcept { return _constPool.size(); }
+  //! Returns minimum alignment.
+  inline size_t alignment() const noexcept { return _constPool.alignment(); }
+
+  //! Returns the wrapped `ConstPool` instance.
+  inline ConstPool& constPool() noexcept { return _constPool; }
+  //! Returns the wrapped `ConstPool` instance (const).
+  inline const ConstPool& constPool() const noexcept { return _constPool; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! See `ConstPool::add()`.
+  inline Error add(const void* data, size_t size, size_t& dstOffset) noexcept {
+    return _constPool.add(data, size, dstOffset);
+  }
+
+  //! \}
+};
+
+//! Comment node.
+class CommentNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(CommentNode)
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `CommentNode` instance.
+  inline CommentNode(BaseBuilder* cb, const char* comment) noexcept
+    : BaseNode(cb, NodeType::kComment, NodeFlags::kIsInformative | NodeFlags::kHasNoEffect | NodeFlags::kIsRemovable) {
+    _inlineComment = comment;
+  }
+
+  //! \}
+};
+
+//! Sentinel node.
+//!
+//! Sentinel is a marker that is completely ignored by the code builder. It's used to remember a position in a code
+//! as it never gets removed by any pass.
+class SentinelNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(SentinelNode)
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `SentinelNode` instance.
+  inline SentinelNode(BaseBuilder* cb, SentinelType sentinelType = SentinelType::kUnknown) noexcept
+    : BaseNode(cb, NodeType::kSentinel, NodeFlags::kIsInformative | NodeFlags::kHasNoEffect) {
+
+    _sentinel._sentinelType = sentinelType;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the type of the sentinel.
+  inline SentinelType sentinelType() const noexcept {
+    return _sentinel._sentinelType;
+  }
+
+  //! Sets the type of the sentinel.
+  inline void setSentinelType(SentinelType type) noexcept {
+    _sentinel._sentinelType = type;
+  }
+
+  //! \}
+};
+
+//! Pass can be used to implement code transformations, analysis, and lowering.
+class ASMJIT_VIRTAPI Pass {
+public:
+  ASMJIT_BASE_CLASS(Pass)
+  ASMJIT_NONCOPYABLE(Pass)
+
+  //! \name Members
+  //! \{
+
+  //! BaseBuilder this pass is assigned to.
+  BaseBuilder* _cb = nullptr;
+  //! Name of the pass.
+  const char* _name = nullptr;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API Pass(const char* name) noexcept;
+  ASMJIT_API virtual ~Pass() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns \ref BaseBuilder associated with the pass.
+  inline const BaseBuilder* cb() const noexcept { return _cb; }
+  //! Returns the name of the pass.
+  inline const char* name() const noexcept { return _name; }
+
+  //! \}
+
+  //! \name Pass Interface
+  //! \{
+
+  //! Processes the code stored in Builder or Compiler.
+  //!
+  //! This is the only function that is called by the `BaseBuilder` to process the code. It passes `zone`,
+  //! which will be reset after the `run()` finishes.
+  virtual Error run(Zone* zone, Logger* logger) = 0;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_BUILDER
+#endif // ASMJIT_CORE_BUILDER_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/codebuffer.h b/lib/lepton/asmjit/core/codebuffer.h
new file mode 100644
index 0000000000..4946e7a06a
--- /dev/null
+++ b/lib/lepton/asmjit/core/codebuffer.h
@@ -0,0 +1,113 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_CODEBUFFER_H_INCLUDED
+#define ASMJIT_CORE_CODEBUFFER_H_INCLUDED
+
+#include "../core/globals.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_core
+//! \{
+
+//! Flags used by \ref CodeBuffer.
+enum class CodeBufferFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+  //! Buffer is external (not allocated by asmjit).
+  kIsExternal = 0x00000001u,
+  //! Buffer is fixed (cannot be reallocated).
+  kIsFixed = 0x00000002u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(CodeBufferFlags)
+
+//! Code or data buffer.
+struct CodeBuffer {
+  //! \name Members
+  //! \{
+
+  //! The content of the buffer (data).
+  uint8_t* _data;
+  //! Number of bytes of `data` used.
+  size_t _size;
+  //! Buffer capacity (in bytes).
+  size_t _capacity;
+  //! Buffer flags.
+  CodeBufferFlags _flags;
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  //! Returns a referebce to the byte at the given `index`.
+  inline uint8_t& operator[](size_t index) noexcept {
+    ASMJIT_ASSERT(index < _size);
+    return _data[index];
+  }
+  //! \overload
+  inline const uint8_t& operator[](size_t index) const noexcept {
+    ASMJIT_ASSERT(index < _size);
+    return _data[index];
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns code buffer flags.
+  inline CodeBufferFlags flags() const noexcept { return _flags; }
+  //! Tests whether the code buffer has the given `flag` set.
+  inline bool hasFlag(CodeBufferFlags flag) const noexcept { return Support::test(_flags, flag); }
+
+  //! Tests whether this code buffer has a fixed size.
+  //!
+  //! Fixed size means that the code buffer is fixed and cannot grow.
+  inline bool isFixed() const noexcept { return hasFlag(CodeBufferFlags::kIsFixed); }
+
+  //! Tests whether the data in this code buffer is external.
+  //!
+  //! External data can only be provided by users, it's never used by AsmJit.
+  inline bool isExternal() const noexcept { return hasFlag(CodeBufferFlags::kIsExternal); }
+
+  //! Tests whether the data in this code buffer is allocated (non-null).
+  inline bool isAllocated() const noexcept { return _data != nullptr; }
+
+  //! Tests whether the code buffer is empty.
+  inline bool empty() const noexcept { return !_size; }
+
+  //! Returns the size of the data.
+  inline size_t size() const noexcept { return _size; }
+  //! Returns the capacity of the data.
+  inline size_t capacity() const noexcept { return _capacity; }
+
+  //! Returns the pointer to the data the buffer references.
+  inline uint8_t* data() noexcept { return _data; }
+  //! \overload
+  inline const uint8_t* data() const noexcept { return _data; }
+
+  //! \}
+
+  //! \name Iterators
+  //! \{
+
+  inline uint8_t* begin() noexcept { return _data; }
+  inline const uint8_t* begin() const noexcept { return _data; }
+
+  inline uint8_t* end() noexcept { return _data + _size; }
+  inline const uint8_t* end() const noexcept { return _data + _size; }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_CODEBUFFER_H_INCLUDED
+
diff --git a/lib/lepton/asmjit/core/codeholder.cpp b/lib/lepton/asmjit/core/codeholder.cpp
new file mode 100644
index 0000000000..cf763cfff1
--- /dev/null
+++ b/lib/lepton/asmjit/core/codeholder.cpp
@@ -0,0 +1,1149 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/assembler.h"
+#include "../core/codewriter_p.h"
+#include "../core/logger.h"
+#include "../core/support.h"
+
+#include <algorithm>
+#include <tuple>
+
+ASMJIT_BEGIN_NAMESPACE
+
+// Globals
+// =======
+
+static const char CodeHolder_addrTabName[] = ".addrtab";
+
+//! Encode MOD byte.
+static inline uint32_t x86EncodeMod(uint32_t m, uint32_t o, uint32_t rm) noexcept {
+  return (m << 6) | (o << 3) | rm;
+}
+
+// LabelLinkIterator
+// =================
+
+class LabelLinkIterator {
+public:
+  inline LabelLinkIterator(LabelEntry* le) noexcept { reset(le); }
+
+  inline explicit operator bool() const noexcept { return isValid(); }
+  inline bool isValid() const noexcept { return _link != nullptr; }
+
+  inline LabelLink* link() const noexcept { return _link; }
+  inline LabelLink* operator->() const noexcept { return _link; }
+
+  inline void reset(LabelEntry* le) noexcept {
+    _pPrev = &le->_links;
+    _link = *_pPrev;
+  }
+
+  inline void next() noexcept {
+    _pPrev = &_link->next;
+    _link = *_pPrev;
+  }
+
+  inline void resolveAndNext(CodeHolder* code) noexcept {
+    LabelLink* linkToDelete = _link;
+
+    _link = _link->next;
+    *_pPrev = _link;
+
+    code->_unresolvedLinkCount--;
+    code->_allocator.release(linkToDelete, sizeof(LabelLink));
+  }
+
+  LabelLink** _pPrev;
+  LabelLink* _link;
+};
+
+// CodeHolder - Utilities
+// ======================
+
+static void CodeHolder_resetInternal(CodeHolder* self, ResetPolicy resetPolicy) noexcept {
+  uint32_t i;
+  const ZoneVector<BaseEmitter*>& emitters = self->emitters();
+
+  i = emitters.size();
+  while (i)
+    self->detach(emitters[--i]);
+
+  // Reset everything into its construction state.
+  self->_environment.reset();
+  self->_baseAddress = Globals::kNoBaseAddress;
+  self->_logger = nullptr;
+  self->_errorHandler = nullptr;
+
+  // Reset all sections.
+  uint32_t numSections = self->_sections.size();
+  for (i = 0; i < numSections; i++) {
+    Section* section = self->_sections[i];
+    if (section->_buffer.data() && !section->_buffer.isExternal())
+      ::free(section->_buffer._data);
+    section->_buffer._data = nullptr;
+    section->_buffer._capacity = 0;
+  }
+
+  // Reset zone allocator and all containers using it.
+  ZoneAllocator* allocator = self->allocator();
+
+  self->_emitters.reset();
+  self->_namedLabels.reset();
+  self->_relocations.reset();
+  self->_labelEntries.reset();
+  self->_sections.reset();
+  self->_sectionsByOrder.reset();
+
+  self->_unresolvedLinkCount = 0;
+  self->_addressTableSection = nullptr;
+  self->_addressTableEntries.reset();
+
+  allocator->reset(&self->_zone);
+  self->_zone.reset(resetPolicy);
+}
+
+static void CodeHolder_onSettingsUpdated(CodeHolder* self) noexcept {
+  // Notify all attached emitters about a settings update.
+  for (BaseEmitter* emitter : self->emitters()) {
+    emitter->onSettingsUpdated();
+  }
+}
+
+// CodeHolder - Construction & Destruction
+// =======================================
+
+CodeHolder::CodeHolder(const Support::Temporary* temporary) noexcept
+  : _environment(),
+    _baseAddress(Globals::kNoBaseAddress),
+    _logger(nullptr),
+    _errorHandler(nullptr),
+    _zone(16384 - Zone::kBlockOverhead, 1, temporary),
+    _allocator(&_zone),
+    _unresolvedLinkCount(0),
+    _addressTableSection(nullptr) {}
+
+CodeHolder::~CodeHolder() noexcept {
+  CodeHolder_resetInternal(this, ResetPolicy::kHard);
+}
+
+// CodeHolder - Init & Reset
+// =========================
+
+inline void CodeHolder_setSectionDefaultName(
+  Section* section,
+  char c0 = 0, char c1 = 0, char c2 = 0, char c3 = 0,
+  char c4 = 0, char c5 = 0, char c6 = 0, char c7 = 0) noexcept {
+
+  section->_name.u32[0] = Support::bytepack32_4x8(uint8_t(c0), uint8_t(c1), uint8_t(c2), uint8_t(c3));
+  section->_name.u32[1] = Support::bytepack32_4x8(uint8_t(c4), uint8_t(c5), uint8_t(c6), uint8_t(c7));
+}
+
+Error CodeHolder::init(const Environment& environment, uint64_t baseAddress) noexcept {
+  // Cannot reinitialize if it's locked or there is one or more emitter attached.
+  if (isInitialized())
+    return DebugUtils::errored(kErrorAlreadyInitialized);
+
+  // If we are just initializing there should be no emitters attached.
+  ASMJIT_ASSERT(_emitters.empty());
+
+  // Create a default section and insert it to the `_sections` array.
+  Error err = _sections.willGrow(&_allocator) |
+              _sectionsByOrder.willGrow(&_allocator);
+  if (err == kErrorOk) {
+    Section* section = _allocator.allocZeroedT<Section>();
+    if (ASMJIT_LIKELY(section)) {
+      section->_flags = SectionFlags::kExecutable | SectionFlags::kReadOnly;
+      CodeHolder_setSectionDefaultName(section, '.', 't', 'e', 'x', 't');
+      _sections.appendUnsafe(section);
+      _sectionsByOrder.appendUnsafe(section);
+    }
+    else {
+      err = DebugUtils::errored(kErrorOutOfMemory);
+    }
+  }
+
+  if (ASMJIT_UNLIKELY(err)) {
+    _zone.reset();
+    return err;
+  }
+  else {
+    _environment = environment;
+    _baseAddress = baseAddress;
+    return kErrorOk;
+  }
+}
+
+void CodeHolder::reset(ResetPolicy resetPolicy) noexcept {
+  CodeHolder_resetInternal(this, resetPolicy);
+}
+
+// CodeHolder - Attach / Detach
+// ============================
+
+Error CodeHolder::attach(BaseEmitter* emitter) noexcept {
+  // Catch a possible misuse of the API.
+  if (ASMJIT_UNLIKELY(!emitter))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  // Invalid emitter, this should not be possible.
+  EmitterType type = emitter->emitterType();
+  if (ASMJIT_UNLIKELY(type == EmitterType::kNone || uint32_t(type) > uint32_t(EmitterType::kMaxValue)))
+    return DebugUtils::errored(kErrorInvalidState);
+
+  uint64_t archMask = emitter->_archMask;
+  if (ASMJIT_UNLIKELY(!(archMask & (uint64_t(1) << uint32_t(arch())))))
+    return DebugUtils::errored(kErrorInvalidArch);
+
+  // This is suspicious, but don't fail if `emitter` is already attached
+  // to this code holder. This is not error, but it's not recommended.
+  if (emitter->_code != nullptr) {
+    if (emitter->_code == this)
+      return kErrorOk;
+    return DebugUtils::errored(kErrorInvalidState);
+  }
+
+  // Reserve the space now as we cannot fail after `onAttach()` succeeded.
+  ASMJIT_PROPAGATE(_emitters.willGrow(&_allocator, 1));
+  ASMJIT_PROPAGATE(emitter->onAttach(this));
+
+  // Connect CodeHolder <-> BaseEmitter.
+  ASMJIT_ASSERT(emitter->_code == this);
+  _emitters.appendUnsafe(emitter);
+
+  return kErrorOk;
+}
+
+Error CodeHolder::detach(BaseEmitter* emitter) noexcept {
+  if (ASMJIT_UNLIKELY(!emitter))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  if (ASMJIT_UNLIKELY(emitter->_code != this))
+    return DebugUtils::errored(kErrorInvalidState);
+
+  // NOTE: We always detach if we were asked to, if error happens during
+  // `emitter->onDetach()` we just propagate it, but the BaseEmitter will
+  // be detached.
+  Error err = kErrorOk;
+  if (!emitter->isDestroyed())
+    err = emitter->onDetach(this);
+
+  // Disconnect CodeHolder <-> BaseEmitter.
+  uint32_t index = _emitters.indexOf(emitter);
+  ASMJIT_ASSERT(index != Globals::kNotFound);
+
+  _emitters.removeAt(index);
+  emitter->_code = nullptr;
+
+  return err;
+}
+
+// CodeHolder - Logging
+// ====================
+
+void CodeHolder::setLogger(Logger* logger) noexcept {
+#ifndef ASMJIT_NO_LOGGING
+  _logger = logger;
+  CodeHolder_onSettingsUpdated(this);
+#else
+  DebugUtils::unused(logger);
+#endif
+}
+
+// CodeHolder - Error Handling
+// ===========================
+
+void CodeHolder::setErrorHandler(ErrorHandler* errorHandler) noexcept {
+  _errorHandler = errorHandler;
+  CodeHolder_onSettingsUpdated(this);
+}
+
+// CodeHolder - Code Buffer
+// ========================
+
+static Error CodeHolder_reserveInternal(CodeHolder* self, CodeBuffer* cb, size_t n) noexcept {
+  uint8_t* oldData = cb->_data;
+  uint8_t* newData;
+
+  if (oldData && !cb->isExternal())
+    newData = static_cast<uint8_t*>(::realloc(oldData, n));
+  else
+    newData = static_cast<uint8_t*>(::malloc(n));
+
+  if (ASMJIT_UNLIKELY(!newData))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  cb->_data = newData;
+  cb->_capacity = n;
+
+  // Update pointers used by assemblers, if attached.
+  for (BaseEmitter* emitter : self->emitters()) {
+    if (emitter->isAssembler()) {
+      BaseAssembler* a = static_cast<BaseAssembler*>(emitter);
+      if (&a->_section->_buffer == cb) {
+        size_t offset = a->offset();
+
+        a->_bufferData = newData;
+        a->_bufferEnd  = newData + n;
+        a->_bufferPtr  = newData + offset;
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+Error CodeHolder::growBuffer(CodeBuffer* cb, size_t n) noexcept {
+  // The size of the section must be valid.
+  size_t size = cb->size();
+  if (ASMJIT_UNLIKELY(n > std::numeric_limits<uintptr_t>::max() - size))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  // We can now check if growing the buffer is really necessary. It's unlikely
+  // that this function is called while there is still room for `n` bytes.
+  size_t capacity = cb->capacity();
+  size_t required = cb->size() + n;
+  if (ASMJIT_UNLIKELY(required <= capacity))
+    return kErrorOk;
+
+  if (cb->isFixed())
+    return DebugUtils::errored(kErrorTooLarge);
+
+  size_t kInitialCapacity = 8096;
+  if (capacity < kInitialCapacity)
+    capacity = kInitialCapacity;
+  else
+    capacity += Globals::kAllocOverhead;
+
+  do {
+    size_t old = capacity;
+    if (capacity < Globals::kGrowThreshold)
+      capacity *= 2;
+    else
+      capacity += Globals::kGrowThreshold;
+
+    // Overflow.
+    if (ASMJIT_UNLIKELY(old > capacity))
+      return DebugUtils::errored(kErrorOutOfMemory);
+  } while (capacity - Globals::kAllocOverhead < required);
+
+  return CodeHolder_reserveInternal(this, cb, capacity - Globals::kAllocOverhead);
+}
+
+Error CodeHolder::reserveBuffer(CodeBuffer* cb, size_t n) noexcept {
+  size_t capacity = cb->capacity();
+
+  if (n <= capacity)
+    return kErrorOk;
+
+  if (cb->isFixed())
+    return DebugUtils::errored(kErrorTooLarge);
+
+  return CodeHolder_reserveInternal(this, cb, n);
+}
+
+// CodeHolder - Sections
+// =====================
+
+Error CodeHolder::newSection(Section** sectionOut, const char* name, size_t nameSize, SectionFlags flags, uint32_t alignment, int32_t order) noexcept {
+  *sectionOut = nullptr;
+
+  if (nameSize == SIZE_MAX)
+    nameSize = strlen(name);
+
+  if (alignment == 0)
+    alignment = 1;
+
+  if (ASMJIT_UNLIKELY(!Support::isPowerOf2(alignment)))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  if (ASMJIT_UNLIKELY(nameSize > Globals::kMaxSectionNameSize))
+    return DebugUtils::errored(kErrorInvalidSectionName);
+
+  uint32_t sectionId = _sections.size();
+  if (ASMJIT_UNLIKELY(sectionId == Globals::kInvalidId))
+    return DebugUtils::errored(kErrorTooManySections);
+
+  ASMJIT_PROPAGATE(_sections.willGrow(&_allocator));
+  ASMJIT_PROPAGATE(_sectionsByOrder.willGrow(&_allocator));
+
+  Section* section = _allocator.allocZeroedT<Section>();
+  if (ASMJIT_UNLIKELY(!section))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  section->_id = sectionId;
+  section->_flags = flags;
+  section->_alignment = alignment;
+  section->_order = order;
+  memcpy(section->_name.str, name, nameSize);
+
+  Section** insertPosition = std::lower_bound(_sectionsByOrder.begin(), _sectionsByOrder.end(), section, [](const Section* a, const Section* b) {
+    return std::make_tuple(a->order(), a->id()) < std::make_tuple(b->order(), b->id());
+  });
+
+  _sections.appendUnsafe(section);
+  _sectionsByOrder.insertUnsafe((size_t)(insertPosition - _sectionsByOrder.data()), section);
+
+  *sectionOut = section;
+  return kErrorOk;
+}
+
+Section* CodeHolder::sectionByName(const char* name, size_t nameSize) const noexcept {
+  if (nameSize == SIZE_MAX)
+    nameSize = strlen(name);
+
+  // This could be also put in a hash-table similarly like we do with labels,
+  // however it's questionable as the number of sections should be pretty low
+  // in general. Create an issue if this becomes a problem.
+  if (nameSize <= Globals::kMaxSectionNameSize) {
+    for (Section* section : _sections)
+      if (memcmp(section->_name.str, name, nameSize) == 0 && section->_name.str[nameSize] == '\0')
+        return section;
+  }
+
+  return nullptr;
+}
+
+Section* CodeHolder::ensureAddressTableSection() noexcept {
+  if (_addressTableSection)
+    return _addressTableSection;
+
+  newSection(&_addressTableSection,
+             CodeHolder_addrTabName,
+             sizeof(CodeHolder_addrTabName) - 1,
+             SectionFlags::kNone,
+             _environment.registerSize(),
+             std::numeric_limits<int32_t>::max());
+  return _addressTableSection;
+}
+
+Error CodeHolder::addAddressToAddressTable(uint64_t address) noexcept {
+  AddressTableEntry* entry = _addressTableEntries.get(address);
+  if (entry)
+    return kErrorOk;
+
+  Section* section = ensureAddressTableSection();
+  if (ASMJIT_UNLIKELY(!section))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  entry = _zone.newT<AddressTableEntry>(address);
+  if (ASMJIT_UNLIKELY(!entry))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  _addressTableEntries.insert(entry);
+  section->_virtualSize += _environment.registerSize();
+
+  return kErrorOk;
+}
+
+// CodeHolder - Labels & Symbols
+// =============================
+
+//! Only used to lookup a label from `_namedLabels`.
+class LabelByName {
+public:
+  inline LabelByName(const char* key, size_t keySize, uint32_t hashCode, uint32_t parentId) noexcept
+    : _key(key),
+      _keySize(uint32_t(keySize)),
+      _hashCode(hashCode),
+      _parentId(parentId) {}
+
+  inline uint32_t hashCode() const noexcept { return _hashCode; }
+
+  inline bool matches(const LabelEntry* entry) const noexcept {
+    return entry->nameSize() == _keySize &&
+           entry->parentId() == _parentId &&
+           ::memcmp(entry->name(), _key, _keySize) == 0;
+  }
+
+  const char* _key;
+  uint32_t _keySize;
+  uint32_t _hashCode;
+  uint32_t _parentId;
+};
+
+// Returns a hash of `name` and fixes `nameSize` if it's `SIZE_MAX`.
+static uint32_t CodeHolder_hashNameAndGetSize(const char* name, size_t& nameSize) noexcept {
+  uint32_t hashCode = 0;
+  if (nameSize == SIZE_MAX) {
+    size_t i = 0;
+    for (;;) {
+      uint8_t c = uint8_t(name[i]);
+      if (!c) break;
+      hashCode = Support::hashRound(hashCode, c);
+      i++;
+    }
+    nameSize = i;
+  }
+  else {
+    for (size_t i = 0; i < nameSize; i++) {
+      uint8_t c = uint8_t(name[i]);
+      if (ASMJIT_UNLIKELY(!c)) return DebugUtils::errored(kErrorInvalidLabelName);
+      hashCode = Support::hashRound(hashCode, c);
+    }
+  }
+  return hashCode;
+}
+
+LabelLink* CodeHolder::newLabelLink(LabelEntry* le, uint32_t sectionId, size_t offset, intptr_t rel, const OffsetFormat& format) noexcept {
+  LabelLink* link = _allocator.allocT<LabelLink>();
+  if (ASMJIT_UNLIKELY(!link)) return nullptr;
+
+  link->next = le->_links;
+  le->_links = link;
+
+  link->sectionId = sectionId;
+  link->relocId = Globals::kInvalidId;
+  link->offset = offset;
+  link->rel = rel;
+  link->format = format;
+
+  _unresolvedLinkCount++;
+  return link;
+}
+
+Error CodeHolder::newLabelEntry(LabelEntry** entryOut) noexcept {
+  *entryOut = nullptr;
+
+  uint32_t labelId = _labelEntries.size();
+  if (ASMJIT_UNLIKELY(labelId == Globals::kInvalidId))
+    return DebugUtils::errored(kErrorTooManyLabels);
+
+  ASMJIT_PROPAGATE(_labelEntries.willGrow(&_allocator));
+  LabelEntry* le = _allocator.allocZeroedT<LabelEntry>();
+
+  if (ASMJIT_UNLIKELY(!le))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  le->_setId(labelId);
+  le->_parentId = Globals::kInvalidId;
+  le->_offset = 0;
+  _labelEntries.appendUnsafe(le);
+
+  *entryOut = le;
+  return kErrorOk;
+}
+
+Error CodeHolder::newNamedLabelEntry(LabelEntry** entryOut, const char* name, size_t nameSize, LabelType type, uint32_t parentId) noexcept {
+  *entryOut = nullptr;
+  uint32_t hashCode = CodeHolder_hashNameAndGetSize(name, nameSize);
+
+  if (ASMJIT_UNLIKELY(nameSize == 0)) {
+    if (type == LabelType::kAnonymous)
+      return newLabelEntry(entryOut);
+    else
+      return DebugUtils::errored(kErrorInvalidLabelName);
+  }
+
+  if (ASMJIT_UNLIKELY(nameSize > Globals::kMaxLabelNameSize))
+    return DebugUtils::errored(kErrorLabelNameTooLong);
+
+  switch (type) {
+    case LabelType::kAnonymous: {
+      // Anonymous labels cannot have a parent (or more specifically, parent is useless here).
+      if (ASMJIT_UNLIKELY(parentId != Globals::kInvalidId))
+        return DebugUtils::errored(kErrorInvalidParentLabel);
+
+      uint32_t labelId = _labelEntries.size();
+      if (ASMJIT_UNLIKELY(labelId == Globals::kInvalidId))
+        return DebugUtils::errored(kErrorTooManyLabels);
+
+      ASMJIT_PROPAGATE(_labelEntries.willGrow(&_allocator));
+      LabelEntry* le = _allocator.allocZeroedT<LabelEntry>();
+
+      if (ASMJIT_UNLIKELY(!le))
+        return DebugUtils::errored(kErrorOutOfMemory);
+
+      // NOTE: This LabelEntry has a name, but we leave its hashCode as zero as it's anonymous.
+      le->_setId(labelId);
+      le->_parentId = Globals::kInvalidId;
+      le->_offset = 0;
+      ASMJIT_PROPAGATE(le->_name.setData(&_zone, name, nameSize));
+
+      _labelEntries.appendUnsafe(le);
+
+      *entryOut = le;
+      return kErrorOk;
+    }
+
+    case LabelType::kLocal: {
+      if (ASMJIT_UNLIKELY(parentId >= _labelEntries.size()))
+        return DebugUtils::errored(kErrorInvalidParentLabel);
+
+      hashCode ^= parentId;
+      break;
+    }
+
+    case LabelType::kGlobal:
+    case LabelType::kExternal: {
+      if (ASMJIT_UNLIKELY(parentId != Globals::kInvalidId))
+        return DebugUtils::errored(kErrorInvalidParentLabel);
+      break;
+    }
+
+    default: {
+      return DebugUtils::errored(kErrorInvalidArgument);
+    }
+  }
+
+  // Don't allow to insert duplicates. Local labels allow duplicates that have
+  // different id, this is already accomplished by having a different hashes
+  // between the same label names having different parent labels.
+  LabelEntry* le = _namedLabels.get(LabelByName(name, nameSize, hashCode, parentId));
+  if (ASMJIT_UNLIKELY(le))
+    return DebugUtils::errored(kErrorLabelAlreadyDefined);
+
+  Error err = kErrorOk;
+  uint32_t labelId = _labelEntries.size();
+
+  if (ASMJIT_UNLIKELY(labelId == Globals::kInvalidId))
+    return DebugUtils::errored(kErrorTooManyLabels);
+
+  ASMJIT_PROPAGATE(_labelEntries.willGrow(&_allocator));
+  le = _allocator.allocZeroedT<LabelEntry>();
+
+  if (ASMJIT_UNLIKELY(!le))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  le->_hashCode = hashCode;
+  le->_setId(labelId);
+  le->_type = type;
+  le->_parentId = parentId;
+  le->_offset = 0;
+  ASMJIT_PROPAGATE(le->_name.setData(&_zone, name, nameSize));
+
+  _labelEntries.appendUnsafe(le);
+  _namedLabels.insert(allocator(), le);
+
+  *entryOut = le;
+  return err;
+}
+
+uint32_t CodeHolder::labelIdByName(const char* name, size_t nameSize, uint32_t parentId) noexcept {
+  uint32_t hashCode = CodeHolder_hashNameAndGetSize(name, nameSize);
+  if (ASMJIT_UNLIKELY(!nameSize))
+    return 0;
+
+  if (parentId != Globals::kInvalidId)
+    hashCode ^= parentId;
+
+  LabelEntry* le = _namedLabels.get(LabelByName(name, nameSize, hashCode, parentId));
+  return le ? le->id() : uint32_t(Globals::kInvalidId);
+}
+
+ASMJIT_API Error CodeHolder::resolveUnresolvedLinks() noexcept {
+  if (!hasUnresolvedLinks())
+    return kErrorOk;
+
+  Error err = kErrorOk;
+  for (LabelEntry* le : labelEntries()) {
+    if (!le->isBound())
+      continue;
+
+    LabelLinkIterator link(le);
+    if (link) {
+      Support::FastUInt8 of = 0;
+      Section* toSection = le->section();
+      uint64_t toOffset = Support::addOverflow(toSection->offset(), le->offset(), &of);
+
+      do {
+        uint32_t linkSectionId = link->sectionId;
+        if (link->relocId == Globals::kInvalidId) {
+          Section* fromSection = sectionById(linkSectionId);
+          size_t linkOffset = link->offset;
+
+          CodeBuffer& buf = _sections[linkSectionId]->buffer();
+          ASMJIT_ASSERT(linkOffset < buf.size());
+
+          // Calculate the offset relative to the start of the virtual base.
+          Support::FastUInt8 localOF = of;
+          uint64_t fromOffset = Support::addOverflow<uint64_t>(fromSection->offset(), linkOffset, &localOF);
+          int64_t displacement = int64_t(toOffset - fromOffset + uint64_t(int64_t(link->rel)));
+
+          if (!localOF) {
+            ASMJIT_ASSERT(size_t(linkOffset) < buf.size());
+            ASMJIT_ASSERT(buf.size() - size_t(linkOffset) >= link->format.valueSize());
+
+            // Overwrite a real displacement in the CodeBuffer.
+            if (CodeWriterUtils::writeOffset(buf._data + linkOffset, displacement, link->format)) {
+              link.resolveAndNext(this);
+              continue;
+            }
+          }
+
+          err = DebugUtils::errored(kErrorInvalidDisplacement);
+          // Falls through to `link.next()`.
+        }
+
+        link.next();
+      } while (link);
+    }
+  }
+
+  return err;
+}
+
+ASMJIT_API Error CodeHolder::bindLabel(const Label& label, uint32_t toSectionId, uint64_t toOffset) noexcept {
+  LabelEntry* le = labelEntry(label);
+  if (ASMJIT_UNLIKELY(!le))
+    return DebugUtils::errored(kErrorInvalidLabel);
+
+  if (ASMJIT_UNLIKELY(toSectionId > _sections.size()))
+    return DebugUtils::errored(kErrorInvalidSection);
+
+  // Label can be bound only once.
+  if (ASMJIT_UNLIKELY(le->isBound()))
+    return DebugUtils::errored(kErrorLabelAlreadyBound);
+
+  // Bind the label.
+  Section* section = _sections[toSectionId];
+  le->_section = section;
+  le->_offset = toOffset;
+
+  Error err = kErrorOk;
+  CodeBuffer& buf = section->buffer();
+
+  // Fix all links to this label we have collected so far if they are within
+  // the same section. We ignore any inter-section links as these have to be
+  // fixed later.
+  LabelLinkIterator link(le);
+  while (link) {
+    uint32_t linkSectionId = link->sectionId;
+    size_t linkOffset = link->offset;
+
+    uint32_t relocId = link->relocId;
+    if (relocId != Globals::kInvalidId) {
+      // Adjust relocation data only.
+      RelocEntry* re = _relocations[relocId];
+      re->_payload += toOffset;
+      re->_targetSectionId = toSectionId;
+    }
+    else {
+      if (linkSectionId != toSectionId) {
+        link.next();
+        continue;
+      }
+
+      ASMJIT_ASSERT(linkOffset < buf.size());
+      int64_t displacement = int64_t(toOffset - uint64_t(linkOffset) + uint64_t(int64_t(link->rel)));
+
+      // Size of the value we are going to patch. Only BYTE/DWORD is allowed.
+      ASMJIT_ASSERT(buf.size() - size_t(linkOffset) >= link->format.regionSize());
+
+      // Overwrite a real displacement in the CodeBuffer.
+      if (!CodeWriterUtils::writeOffset(buf._data + linkOffset, displacement, link->format)) {
+        err = DebugUtils::errored(kErrorInvalidDisplacement);
+        link.next();
+        continue;
+      }
+    }
+
+    link.resolveAndNext(this);
+  }
+
+  return err;
+}
+
+// CodeHolder - Relocations
+// ========================
+
+Error CodeHolder::newRelocEntry(RelocEntry** dst, RelocType relocType) noexcept {
+  ASMJIT_PROPAGATE(_relocations.willGrow(&_allocator));
+
+  uint32_t relocId = _relocations.size();
+  if (ASMJIT_UNLIKELY(relocId == Globals::kInvalidId))
+    return DebugUtils::errored(kErrorTooManyRelocations);
+
+  RelocEntry* re = _allocator.allocZeroedT<RelocEntry>();
+  if (ASMJIT_UNLIKELY(!re))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  re->_id = relocId;
+  re->_relocType = relocType;
+  re->_sourceSectionId = Globals::kInvalidId;
+  re->_targetSectionId = Globals::kInvalidId;
+  _relocations.appendUnsafe(re);
+
+  *dst = re;
+  return kErrorOk;
+}
+
+// CodeHolder - Expression Evaluation
+// ==================================
+
+static Error CodeHolder_evaluateExpression(CodeHolder* self, Expression* exp, uint64_t* out) noexcept {
+  uint64_t value[2];
+  for (size_t i = 0; i < 2; i++) {
+    uint64_t v;
+    switch (exp->valueType[i]) {
+      case ExpressionValueType::kNone: {
+        v = 0;
+        break;
+      }
+
+      case ExpressionValueType::kConstant: {
+        v = exp->value[i].constant;
+        break;
+      }
+
+      case ExpressionValueType::kLabel: {
+        LabelEntry* le = exp->value[i].label;
+        if (!le->isBound())
+          return DebugUtils::errored(kErrorExpressionLabelNotBound);
+        v = le->section()->offset() + le->offset();
+        break;
+      }
+
+      case ExpressionValueType::kExpression: {
+        Expression* nested = exp->value[i].expression;
+        ASMJIT_PROPAGATE(CodeHolder_evaluateExpression(self, nested, &v));
+        break;
+      }
+
+      default:
+        return DebugUtils::errored(kErrorInvalidState);
+    }
+
+    value[i] = v;
+  }
+
+  uint64_t result;
+  uint64_t& a = value[0];
+  uint64_t& b = value[1];
+
+  switch (exp->opType) {
+    case ExpressionOpType::kAdd:
+      result = a + b;
+      break;
+
+    case ExpressionOpType::kSub:
+      result = a - b;
+      break;
+
+    case ExpressionOpType::kMul:
+      result = a * b;
+      break;
+
+    case ExpressionOpType::kSll:
+      result = (b > 63) ? uint64_t(0) : uint64_t(a << b);
+      break;
+
+    case ExpressionOpType::kSrl:
+      result = (b > 63) ? uint64_t(0) : uint64_t(a >> b);
+      break;
+
+    case ExpressionOpType::kSra:
+      result = Support::sar(a, Support::min<uint64_t>(b, 63));
+      break;
+
+    default:
+      return DebugUtils::errored(kErrorInvalidState);
+  }
+
+  *out = result;
+  return kErrorOk;
+}
+
+// CodeHolder - Utilities
+// ======================
+
+Error CodeHolder::flatten() noexcept {
+  uint64_t offset = 0;
+  for (Section* section : _sectionsByOrder) {
+    uint64_t realSize = section->realSize();
+    if (realSize) {
+      uint64_t alignedOffset = Support::alignUp(offset, section->alignment());
+      if (ASMJIT_UNLIKELY(alignedOffset < offset))
+        return DebugUtils::errored(kErrorTooLarge);
+
+      Support::FastUInt8 of = 0;
+      offset = Support::addOverflow(alignedOffset, realSize, &of);
+
+      if (ASMJIT_UNLIKELY(of))
+        return DebugUtils::errored(kErrorTooLarge);
+    }
+  }
+
+  // Now we know that we can assign offsets of all sections properly.
+  Section* prev = nullptr;
+  offset = 0;
+  for (Section* section : _sectionsByOrder) {
+    uint64_t realSize = section->realSize();
+    if (realSize)
+      offset = Support::alignUp(offset, section->alignment());
+    section->_offset = offset;
+
+    // Make sure the previous section extends a bit to cover the alignment.
+    if (prev)
+      prev->_virtualSize = offset - prev->_offset;
+
+    prev = section;
+    offset += realSize;
+  }
+
+  return kErrorOk;
+}
+
+size_t CodeHolder::codeSize() const noexcept {
+  Support::FastUInt8 of = 0;
+  uint64_t offset = 0;
+
+  for (Section* section : _sectionsByOrder) {
+    uint64_t realSize = section->realSize();
+
+    if (realSize) {
+      uint64_t alignedOffset = Support::alignUp(offset, section->alignment());
+      ASMJIT_ASSERT(alignedOffset >= offset);
+      offset = Support::addOverflow(alignedOffset, realSize, &of);
+    }
+  }
+
+  if ((sizeof(uint64_t) > sizeof(size_t) && offset > SIZE_MAX) || of)
+    return SIZE_MAX;
+
+  return size_t(offset);
+}
+
+Error CodeHolder::relocateToBase(uint64_t baseAddress) noexcept {
+  // Base address must be provided.
+  if (ASMJIT_UNLIKELY(baseAddress == Globals::kNoBaseAddress))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  _baseAddress = baseAddress;
+  uint32_t addressSize = _environment.registerSize();
+
+  Section* addressTableSection = _addressTableSection;
+  uint32_t addressTableEntryCount = 0;
+  uint8_t* addressTableEntryData = nullptr;
+
+  if (addressTableSection) {
+    ASMJIT_PROPAGATE(
+      reserveBuffer(&addressTableSection->_buffer, size_t(addressTableSection->virtualSize())));
+    addressTableEntryData = addressTableSection->_buffer.data();
+  }
+
+  // Relocate all recorded locations.
+  for (const RelocEntry* re : _relocations) {
+    // Possibly deleted or optimized-out entry.
+    if (re->relocType() == RelocType::kNone)
+      continue;
+
+    Section* sourceSection = sectionById(re->sourceSectionId());
+    Section* targetSection = nullptr;
+
+    if (re->targetSectionId() != Globals::kInvalidId)
+      targetSection = sectionById(re->targetSectionId());
+
+    uint64_t value = re->payload();
+    uint64_t sectionOffset = sourceSection->offset();
+    uint64_t sourceOffset = re->sourceOffset();
+
+    // Make sure that the `RelocEntry` doesn't go out of bounds.
+    size_t regionSize = re->format().regionSize();
+    if (ASMJIT_UNLIKELY(re->sourceOffset() >= sourceSection->bufferSize() ||
+                        sourceSection->bufferSize() - size_t(re->sourceOffset()) < regionSize))
+      return DebugUtils::errored(kErrorInvalidRelocEntry);
+
+    uint8_t* buffer = sourceSection->data();
+
+    switch (re->relocType()) {
+      case RelocType::kExpression: {
+        Expression* expression = (Expression*)(uintptr_t(value));
+        ASMJIT_PROPAGATE(CodeHolder_evaluateExpression(this, expression, &value));
+        break;
+      }
+
+      case RelocType::kAbsToAbs: {
+        break;
+      }
+
+      case RelocType::kRelToAbs: {
+        // Value is currently a relative offset from the start of its section.
+        // We have to convert it to an absolute offset (including base address).
+        if (ASMJIT_UNLIKELY(!targetSection))
+          return DebugUtils::errored(kErrorInvalidRelocEntry);
+
+        //value += baseAddress + sectionOffset + sourceOffset + regionSize;
+        value += baseAddress + targetSection->offset();
+        break;
+      }
+
+      case RelocType::kAbsToRel: {
+        value -= baseAddress + sectionOffset + sourceOffset + regionSize;
+
+        // Sign extend as we are not interested in the high 32-bit word in a 32-bit address space.
+        if (addressSize <= 4)
+          value = uint64_t(int64_t(int32_t(value & 0xFFFFFFFFu)));
+        else if (!Support::isInt32(int64_t(value)))
+          return DebugUtils::errored(kErrorRelocOffsetOutOfRange);
+
+        break;
+      }
+
+      case RelocType::kX64AddressEntry: {
+        size_t valueOffset = size_t(re->sourceOffset()) + re->format().valueOffset();
+        if (re->format().valueSize() != 4 || valueOffset < 2)
+          return DebugUtils::errored(kErrorInvalidRelocEntry);
+
+        // First try whether a relative 32-bit displacement would work.
+        value -= baseAddress + sectionOffset + sourceOffset + regionSize;
+        if (!Support::isInt32(int64_t(value))) {
+          // Relative 32-bit displacement is not possible, use '.addrtab' section.
+          AddressTableEntry* atEntry = _addressTableEntries.get(re->payload());
+          if (ASMJIT_UNLIKELY(!atEntry))
+            return DebugUtils::errored(kErrorInvalidRelocEntry);
+
+          // Cannot be null as we have just matched the `AddressTableEntry`.
+          ASMJIT_ASSERT(addressTableSection != nullptr);
+
+          if (!atEntry->hasAssignedSlot())
+            atEntry->_slot = addressTableEntryCount++;
+
+          size_t atEntryIndex = size_t(atEntry->slot()) * addressSize;
+          uint64_t addrSrc = sectionOffset + sourceOffset + regionSize;
+          uint64_t addrDst = addressTableSection->offset() + uint64_t(atEntryIndex);
+
+          value = addrDst - addrSrc;
+          if (!Support::isInt32(int64_t(value)))
+            return DebugUtils::errored(kErrorRelocOffsetOutOfRange);
+
+          // Bytes that replace [REX, OPCODE] bytes.
+          uint32_t byte0 = 0xFF;
+          uint32_t byte1 = buffer[valueOffset - 1];
+
+          if (byte1 == 0xE8) {
+            // Patch CALL/MOD byte to FF /2 (-> 0x15).
+            byte1 = x86EncodeMod(0, 2, 5);
+          }
+          else if (byte1 == 0xE9) {
+            // Patch JMP/MOD byte to FF /4 (-> 0x25).
+            byte1 = x86EncodeMod(0, 4, 5);
+          }
+          else {
+            return DebugUtils::errored(kErrorInvalidRelocEntry);
+          }
+
+          // Patch `jmp/call` instruction.
+          buffer[valueOffset - 2] = uint8_t(byte0);
+          buffer[valueOffset - 1] = uint8_t(byte1);
+
+          Support::writeU64uLE(addressTableEntryData + atEntryIndex, re->payload());
+        }
+        break;
+      }
+
+      default:
+        return DebugUtils::errored(kErrorInvalidRelocEntry);
+    }
+
+    if (!CodeWriterUtils::writeOffset(buffer + re->sourceOffset(), int64_t(value), re->format())) {
+      return DebugUtils::errored(kErrorInvalidRelocEntry);
+    }
+  }
+
+  // Fixup the virtual size of the address table if it's the last section.
+  if (_sectionsByOrder.last() == addressTableSection) {
+    ASMJIT_ASSERT(addressTableSection != nullptr);
+
+    size_t addressTableSize = addressTableEntryCount * addressSize;
+    addressTableSection->_buffer._size = addressTableSize;
+    addressTableSection->_virtualSize = addressTableSize;
+  }
+
+  return kErrorOk;
+}
+
+Error CodeHolder::copySectionData(void* dst, size_t dstSize, uint32_t sectionId, CopySectionFlags copyFlags) noexcept {
+  if (ASMJIT_UNLIKELY(!isSectionValid(sectionId)))
+    return DebugUtils::errored(kErrorInvalidSection);
+
+  Section* section = sectionById(sectionId);
+  size_t bufferSize = section->bufferSize();
+
+  if (ASMJIT_UNLIKELY(dstSize < bufferSize))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  memcpy(dst, section->data(), bufferSize);
+
+  if (bufferSize < dstSize && Support::test(copyFlags, CopySectionFlags::kPadSectionBuffer)) {
+    size_t paddingSize = dstSize - bufferSize;
+    memset(static_cast<uint8_t*>(dst) + bufferSize, 0, paddingSize);
+  }
+
+  return kErrorOk;
+}
+
+Error CodeHolder::copyFlattenedData(void* dst, size_t dstSize, CopySectionFlags copyFlags) noexcept {
+  size_t end = 0;
+  for (Section* section : _sectionsByOrder) {
+    if (section->offset() > dstSize)
+      return DebugUtils::errored(kErrorInvalidArgument);
+
+    size_t bufferSize = section->bufferSize();
+    size_t offset = size_t(section->offset());
+
+    if (ASMJIT_UNLIKELY(dstSize - offset < bufferSize))
+      return DebugUtils::errored(kErrorInvalidArgument);
+
+    uint8_t* dstTarget = static_cast<uint8_t*>(dst) + offset;
+    size_t paddingSize = 0;
+    memcpy(dstTarget, section->data(), bufferSize);
+
+    if (Support::test(copyFlags, CopySectionFlags::kPadSectionBuffer) && bufferSize < section->virtualSize()) {
+      paddingSize = Support::min<size_t>(dstSize - offset, size_t(section->virtualSize())) - bufferSize;
+      memset(dstTarget + bufferSize, 0, paddingSize);
+    }
+
+    end = Support::max(end, offset + bufferSize + paddingSize);
+  }
+
+  if (end < dstSize && Support::test(copyFlags, CopySectionFlags::kPadTargetBuffer)) {
+    memset(static_cast<uint8_t*>(dst) + end, 0, dstSize - end);
+  }
+
+  return kErrorOk;
+}
+
+// CodeHolder - Tests
+// ==================
+
+#if defined(ASMJIT_TEST)
+UNIT(code_holder) {
+  CodeHolder code;
+
+  INFO("Verifying CodeHolder::init()");
+  Environment env;
+  env.init(Arch::kX86);
+
+  code.init(env);
+  EXPECT(code.arch() == Arch::kX86);
+
+  INFO("Verifying named labels");
+  LabelEntry* le;
+  EXPECT(code.newNamedLabelEntry(&le, "NamedLabel", SIZE_MAX, LabelType::kGlobal) == kErrorOk);
+  EXPECT(strcmp(le->name(), "NamedLabel") == 0);
+  EXPECT(code.labelIdByName("NamedLabel") == le->id());
+
+  INFO("Verifying section ordering");
+  Section* section1;
+  EXPECT(code.newSection(&section1, "high-priority", SIZE_MAX, SectionFlags::kNone, 1, -1) == kErrorOk);
+  EXPECT(code.sections()[1] == section1);
+  EXPECT(code.sectionsByOrder()[0] == section1);
+
+  Section* section0;
+  EXPECT(code.newSection(&section0, "higher-priority", SIZE_MAX, SectionFlags::kNone, 1, -2) == kErrorOk);
+  EXPECT(code.sections()[2] == section0);
+  EXPECT(code.sectionsByOrder()[0] == section0);
+  EXPECT(code.sectionsByOrder()[1] == section1);
+
+  Section* section3;
+  EXPECT(code.newSection(&section3, "low-priority", SIZE_MAX, SectionFlags::kNone, 1, 2) == kErrorOk);
+  EXPECT(code.sections()[3] == section3);
+  EXPECT(code.sectionsByOrder()[3] == section3);
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/codeholder.h b/lib/lepton/asmjit/core/codeholder.h
new file mode 100644
index 0000000000..6ed2ddf942
--- /dev/null
+++ b/lib/lepton/asmjit/core/codeholder.h
@@ -0,0 +1,1035 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_CODEHOLDER_H_INCLUDED
+#define ASMJIT_CORE_CODEHOLDER_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/codebuffer.h"
+#include "../core/errorhandler.h"
+#include "../core/operand.h"
+#include "../core/string.h"
+#include "../core/support.h"
+#include "../core/target.h"
+#include "../core/zone.h"
+#include "../core/zonehash.h"
+#include "../core/zonestring.h"
+#include "../core/zonetree.h"
+#include "../core/zonevector.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_core
+//! \{
+
+class BaseEmitter;
+class CodeHolder;
+class LabelEntry;
+class Logger;
+
+//! Operator type that can be used within an \ref Expression.
+enum class ExpressionOpType : uint8_t {
+  //! Addition.
+  kAdd = 0,
+  //! Subtraction.
+  kSub = 1,
+  //! Multiplication
+  kMul = 2,
+  //! Logical left shift.
+  kSll = 3,
+  //! Logical right shift.
+  kSrl = 4,
+  //! Arithmetic right shift.
+  kSra = 5
+};
+
+//! Value tyoe that can be used within an \ref Expression.
+enum class ExpressionValueType : uint8_t {
+  //! No value or invalid.
+  kNone = 0,
+  //! Value is 64-bit unsigned integer (constant).
+  kConstant = 1,
+  //! Value is \ref LabelEntry, which references a \ref Label.
+  kLabel = 2,
+  //! Value is \ref Expression
+  kExpression = 3
+};
+
+//! Expression node that can reference constants, labels, and another expressions.
+struct Expression {
+  //! Expression value.
+  union Value {
+    //! Constant.
+    uint64_t constant;
+    //! Pointer to another expression.
+    Expression* expression;
+    //! Pointer to \ref LabelEntry.
+    LabelEntry* label;
+  };
+
+  //! \name Members
+  //! \{
+
+  //! Operation type.
+  ExpressionOpType opType;
+  //! Value types of \ref value.
+  ExpressionValueType valueType[2];
+  //! Reserved for future use, should be initialized to zero.
+  uint8_t reserved[5];
+  //! Expression left and right values.
+  Value value[2];
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Resets the whole expression.
+  //!
+  //! Changes both values to \ref ExpressionValueType::kNone.
+  inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
+
+  //! Sets the value type at `index` to \ref ExpressionValueType::kConstant and its content to `constant`.
+  inline void setValueAsConstant(size_t index, uint64_t constant) noexcept {
+    valueType[index] = ExpressionValueType::kConstant;
+    value[index].constant = constant;
+  }
+
+  //! Sets the value type at `index` to \ref ExpressionValueType::kLabel and its content to `labelEntry`.
+  inline void setValueAsLabel(size_t index, LabelEntry* labelEntry) noexcept {
+    valueType[index] = ExpressionValueType::kLabel;
+    value[index].label = labelEntry;
+  }
+
+  //! Sets the value type at `index` to \ref ExpressionValueType::kExpression and its content to `expression`.
+  inline void setValueAsExpression(size_t index, Expression* expression) noexcept {
+    valueType[index] = ExpressionValueType::kExpression;
+    value[index].expression = expression;
+  }
+
+  //! \}
+};
+
+//! Section flags, used by \ref Section.
+enum class SectionFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+  //! Executable (.text sections).
+  kExecutable = 0x00000001u,
+  //! Read-only (.text and .data sections).
+  kReadOnly = 0x00000002u,
+  //! Zero initialized by the loader (BSS).
+  kZeroInitialized = 0x00000004u,
+  //! Info / comment flag.
+  kComment = 0x00000008u,
+  //! Section created implicitly, can be deleted by \ref Target.
+  kImplicit = 0x80000000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(SectionFlags)
+
+//! Flags that can be used with \ref CodeHolder::copySectionData() and \ref CodeHolder::copyFlattenedData().
+enum class CopySectionFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+
+  //! If virtual size of a section is greater than the size of its \ref CodeBuffer then all bytes between the buffer
+  //! size and virtual size will be zeroed. If this option is not set then those bytes would be left as is, which
+  //! means that if the user didn't initialize them they would have a previous content, which may be unwanted.
+  kPadSectionBuffer = 0x00000001u,
+
+  //! Clears the target buffer if the flattened data is less than the destination size. This option works
+  //! only with \ref CodeHolder::copyFlattenedData() as it processes multiple sections. It is ignored by
+  //! \ref CodeHolder::copySectionData().
+  kPadTargetBuffer = 0x00000002u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(CopySectionFlags)
+
+//! Section entry.
+class Section {
+public:
+  //! \name Members
+  //! \{
+
+  //! Section id.
+  uint32_t _id;
+  //! Section flags.
+  SectionFlags _flags;
+  //! Section alignment requirements (0 if no requirements).
+  uint32_t _alignment;
+  //! Order (lower value means higher priority).
+  int32_t _order;
+  //! Offset of this section from base-address.
+  uint64_t _offset;
+  //! Virtual size of the section (zero initialized sections).
+  uint64_t _virtualSize;
+  //! Section name (max 35 characters, PE allows max 8).
+  FixedString<Globals::kMaxSectionNameSize + 1> _name;
+  //! Code or data buffer.
+  CodeBuffer _buffer;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the section id.
+  inline uint32_t id() const noexcept { return _id; }
+  //! Returns the section name, as a null terminated string.
+  inline const char* name() const noexcept { return _name.str; }
+
+  //! Returns the section data.
+  inline uint8_t* data() noexcept { return _buffer.data(); }
+  //! \overload
+  inline const uint8_t* data() const noexcept { return _buffer.data(); }
+
+  //! Returns the section flags.
+  inline SectionFlags flags() const noexcept { return _flags; }
+  //! Tests whether the section has the given `flag`.
+  inline bool hasFlag(SectionFlags flag) const noexcept { return Support::test(_flags, flag); }
+  //! Adds `flags` to the section flags.
+  inline void addFlags(SectionFlags flags) noexcept { _flags |= flags; }
+  //! Removes `flags` from the section flags.
+  inline void clearFlags(SectionFlags flags) noexcept { _flags &= ~flags; }
+
+  //! Returns the minimum section alignment
+  inline uint32_t alignment() const noexcept { return _alignment; }
+  //! Sets the minimum section alignment
+  inline void setAlignment(uint32_t alignment) noexcept { _alignment = alignment; }
+
+  //! Returns the section order, which has a higher priority than section id.
+  inline int32_t order() const noexcept { return _order; }
+
+  //! Returns the section offset, relative to base.
+  inline uint64_t offset() const noexcept { return _offset; }
+  //! Set the section offset.
+  inline void setOffset(uint64_t offset) noexcept { _offset = offset; }
+
+  //! Returns the virtual size of the section.
+  //!
+  //! Virtual size is initially zero and is never changed by AsmJit. It's normal if virtual size is smaller than
+  //! size returned by `bufferSize()` as the buffer stores real data emitted by assemblers or appended by users.
+  //!
+  //! Use `realSize()` to get the real and final size of this section.
+  inline uint64_t virtualSize() const noexcept { return _virtualSize; }
+  //! Sets the virtual size of the section.
+  inline void setVirtualSize(uint64_t virtualSize) noexcept { _virtualSize = virtualSize; }
+
+  //! Returns the buffer size of the section.
+  inline size_t bufferSize() const noexcept { return _buffer.size(); }
+  //! Returns the real size of the section calculated from virtual and buffer sizes.
+  inline uint64_t realSize() const noexcept { return Support::max<uint64_t>(virtualSize(), bufferSize()); }
+
+  //! Returns the `CodeBuffer` used by this section.
+  inline CodeBuffer& buffer() noexcept { return _buffer; }
+  //! Returns the `CodeBuffer` used by this section (const).
+  inline const CodeBuffer& buffer() const noexcept { return _buffer; }
+
+  //! \}
+};
+
+//! Entry in an address table.
+class AddressTableEntry : public ZoneTreeNodeT<AddressTableEntry> {
+public:
+  ASMJIT_NONCOPYABLE(AddressTableEntry)
+
+  //! \name Members
+  //! \{
+
+  //! Address.
+  uint64_t _address;
+  //! Slot.
+  uint32_t _slot;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline explicit AddressTableEntry(uint64_t address) noexcept
+    : _address(address),
+      _slot(0xFFFFFFFFu) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline uint64_t address() const noexcept { return _address; }
+  inline uint32_t slot() const noexcept { return _slot; }
+
+  inline bool hasAssignedSlot() const noexcept { return _slot != 0xFFFFFFFFu; }
+
+  inline bool operator<(const AddressTableEntry& other) const noexcept { return _address < other._address; }
+  inline bool operator>(const AddressTableEntry& other) const noexcept { return _address > other._address; }
+
+  inline bool operator<(uint64_t queryAddress) const noexcept { return _address < queryAddress; }
+  inline bool operator>(uint64_t queryAddress) const noexcept { return _address > queryAddress; }
+
+  //! \}
+};
+
+//! Offset format type, used by \ref OffsetFormat.
+enum class OffsetType : uint8_t {
+  //! A value having `_immBitCount` bits and shifted by `_immBitShift`.
+  //!
+  //! This offset type is sufficient for many targets that store offset as a continuous set bits within an
+  //! instruction word / sequence of bytes.
+  kSignedOffset,
+
+  //! An unsigned value having `_immBitCount` bits and shifted by `_immBitShift`.
+  kUnsignedOffset,
+
+  // AArch64 Specific Offset Formats
+  // -------------------------------
+
+  //! AARCH64 ADR format of `[.|immlo:2|.....|immhi:19|.....]`.
+  kAArch64_ADR,
+
+  //! AARCH64 ADRP format of `[.|immlo:2|.....|immhi:19|.....]` (4kB pages).
+  kAArch64_ADRP,
+
+  //! Maximum value of `OffsetFormatType`.
+  kMaxValue = kAArch64_ADRP
+};
+
+//! Provides information about formatting offsets, absolute addresses, or their parts. Offset format is used by both
+//! \ref RelocEntry and \ref LabelLink. The illustration below describes the relation of region size and offset size.
+//! Region size is the size of the whole unit whereas offset size is the size of the unit that will be patched.
+//!
+//! ```
+//! +-> Code buffer |   The subject of the relocation (region)  |
+//! |               | (Word-Offset)  (Word-Size)                |
+//! |xxxxxxxxxxxxxxx|................|*PATCHED*|................|xxxxxxxxxxxx->
+//!                                  |         |
+//!     [Word Offset points here]----+         +--- [WordOffset + WordSize]
+//! ```
+//!
+//! Once the offset word has been located it can be patched like this:
+//!
+//! ```
+//!                               |ImmDiscardLSB (discard LSB bits).
+//!                               |..
+//! [0000000000000iiiiiiiiiiiiiiiiiDD] - Offset value (32-bit)
+//! [000000000000000iiiiiiiiiiiiiiiii] - Offset value after discard LSB.
+//! [00000000000iiiiiiiiiiiiiiiii0000] - Offset value shifted by ImmBitShift.
+//! [xxxxxxxxxxxiiiiiiiiiiiiiiiiixxxx] - Patched word (32-bit)
+//!             |...............|
+//!               (ImmBitCount) +- ImmBitShift
+//! ```
+struct OffsetFormat {
+  //! \name Members
+  //! \{
+
+  //! Type of the offset.
+  OffsetType _type;
+  //! Encoding flags.
+  uint8_t _flags;
+  //! Size of the region (in bytes) containing the offset value, if the offset value is part of an instruction,
+  //! otherwise it would be the same as `_valueSize`.
+  uint8_t _regionSize;
+  //! Size of the offset value, in bytes (1, 2, 4, or 8).
+  uint8_t _valueSize;
+  //! Offset of the offset value, in bytes, relative to the start of the region or data. Value offset would be
+  //! zero if both region size and value size are equal.
+  uint8_t _valueOffset;
+  //! Size of the offset immediate value in bits.
+  uint8_t _immBitCount;
+  //! Shift of the offset immediate value in bits in the target word.
+  uint8_t _immBitShift;
+  //! Number of least significant bits to discard before writing the immediate to the destination. All discarded
+  //! bits must be zero otherwise the value is invalid.
+  uint8_t _immDiscardLsb;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the type of the offset.
+  inline OffsetType type() const noexcept { return _type; }
+
+  //! Returns flags.
+  inline uint32_t flags() const noexcept { return _flags; }
+
+  //! Returns the size of the region/instruction where the offset is encoded.
+  inline uint32_t regionSize() const noexcept { return _regionSize; }
+
+  //! Returns the offset of the word relative to the start of the region where the offset is.
+  inline uint32_t valueOffset() const noexcept { return _valueOffset; }
+
+  //! Returns the size of the data-type (word) that contains the offset, in bytes.
+  inline uint32_t valueSize() const noexcept { return _valueSize; }
+  //! Returns the count of bits of the offset value in the data it's stored in.
+  inline uint32_t immBitCount() const noexcept { return _immBitCount; }
+  //! Returns the bit-shift of the offset value in the data it's stored in.
+  inline uint32_t immBitShift() const noexcept { return _immBitShift; }
+  //! Returns the number of least significant bits of the offset value, that must be zero and that are not part of
+  //! the encoded data.
+  inline uint32_t immDiscardLsb() const noexcept { return _immDiscardLsb; }
+
+  //! Resets this offset format to a simple data value of `dataSize` bytes.
+  //!
+  //! The region will be the same size as data and immediate bits would correspond to `dataSize * 8`. There will be
+  //! no immediate bit shift or discarded bits.
+  inline void resetToSimpleValue(OffsetType type, size_t valueSize) noexcept {
+    ASMJIT_ASSERT(valueSize <= 8u);
+
+    _type = type;
+    _flags = uint8_t(0);
+    _regionSize = uint8_t(valueSize);
+    _valueSize = uint8_t(valueSize);
+    _valueOffset = uint8_t(0);
+    _immBitCount = uint8_t(valueSize * 8u);
+    _immBitShift = uint8_t(0);
+    _immDiscardLsb = uint8_t(0);
+  }
+
+  inline void resetToImmValue(OffsetType type, size_t valueSize, uint32_t immBitShift, uint32_t immBitCount, uint32_t immDiscardLsb) noexcept {
+    ASMJIT_ASSERT(valueSize <= 8u);
+    ASMJIT_ASSERT(immBitShift < valueSize * 8u);
+    ASMJIT_ASSERT(immBitCount <= 64u);
+    ASMJIT_ASSERT(immDiscardLsb <= 64u);
+
+    _type = type;
+    _flags = uint8_t(0);
+    _regionSize = uint8_t(valueSize);
+    _valueSize = uint8_t(valueSize);
+    _valueOffset = uint8_t(0);
+    _immBitCount = uint8_t(immBitCount);
+    _immBitShift = uint8_t(immBitShift);
+    _immDiscardLsb = uint8_t(immDiscardLsb);
+  }
+
+  inline void setRegion(size_t regionSize, size_t valueOffset) noexcept {
+    _regionSize = uint8_t(regionSize);
+    _valueOffset = uint8_t(valueOffset);
+  }
+
+  inline void setLeadingAndTrailingSize(size_t leadingSize, size_t trailingSize) noexcept {
+    _regionSize = uint8_t(leadingSize + trailingSize + _valueSize);
+    _valueOffset = uint8_t(leadingSize);
+  }
+
+  //! \}
+};
+
+//! Relocation type.
+enum class RelocType : uint32_t {
+  //! None/deleted (no relocation).
+  kNone = 0,
+  //! Expression evaluation, `_payload` is pointer to `Expression`.
+  kExpression = 1,
+  //! Relocate absolute to absolute.
+  kAbsToAbs = 2,
+  //! Relocate relative to absolute.
+  kRelToAbs = 3,
+  //! Relocate absolute to relative.
+  kAbsToRel = 4,
+  //! Relocate absolute to relative or use trampoline.
+  kX64AddressEntry = 5
+};
+
+//! Relocation entry.
+struct RelocEntry {
+  //! \name Members
+  //! \{
+
+  //! Relocation id.
+  uint32_t _id;
+  //! Type of the relocation.
+  RelocType _relocType;
+  //! Format of the relocated value.
+  OffsetFormat _format;
+  //! Source section id.
+  uint32_t _sourceSectionId;
+  //! Target section id.
+  uint32_t _targetSectionId;
+  //! Source offset (relative to start of the section).
+  uint64_t _sourceOffset;
+  //! Payload (target offset, target address, expression, etc).
+  uint64_t _payload;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline uint32_t id() const noexcept { return _id; }
+
+  inline RelocType relocType() const noexcept { return _relocType; }
+  inline const OffsetFormat& format() const noexcept { return _format; }
+
+  inline uint32_t sourceSectionId() const noexcept { return _sourceSectionId; }
+  inline uint32_t targetSectionId() const noexcept { return _targetSectionId; }
+
+  inline uint64_t sourceOffset() const noexcept { return _sourceOffset; }
+  inline uint64_t payload() const noexcept { return _payload; }
+
+  Expression* payloadAsExpression() const noexcept {
+    return reinterpret_cast<Expression*>(uintptr_t(_payload));
+  }
+
+  //! \}
+};
+
+//! Type of the \ref Label.
+enum class LabelType : uint8_t {
+  //! Anonymous label that can optionally have a name, which is only used for debugging purposes.
+  kAnonymous = 0,
+  //! Local label (always has parentId).
+  kLocal = 1,
+  //! Global label (never has parentId).
+  kGlobal = 2,
+  //! External label (references an external symbol).
+  kExternal = 3,
+
+  //! Maximum value of `LabelType`.
+  kMaxValue = kExternal
+};
+
+//! Data structure used to link either unbound labels or cross-section links.
+struct LabelLink {
+  //! Next link (single-linked list).
+  LabelLink* next;
+  //! Section id where the label is bound.
+  uint32_t sectionId;
+  //! Relocation id or Globals::kInvalidId.
+  uint32_t relocId;
+  //! Label offset relative to the start of the section.
+  size_t offset;
+  //! Inlined rel8/rel32.
+  intptr_t rel;
+  //! Offset format information.
+  OffsetFormat format;
+};
+
+//! Label entry.
+//!
+//! Contains the following properties:
+//!   - Label id - This is the only thing that is set to the `Label` operand.
+//!   - Label name - Optional, used mostly to create executables and libraries.
+//!   - Label type - Type of the label, default `LabelType::kAnonymous`.
+//!   - Label parent id - Derived from many assemblers that allow to define a local label that falls under a global
+//!     label. This allows to define many labels of the same name that have different parent (global) label.
+//!   - Offset - offset of the label bound by `Assembler`.
+//!   - Links - single-linked list that contains locations of code that has to be patched when the label gets bound.
+//!     Every use of unbound label adds one link to `_links` list.
+//!   - HVal - Hash value of label's name and optionally parentId.
+//!   - HashNext - Hash-table implementation detail.
+class LabelEntry : public ZoneHashNode {
+public:
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    //! SSO size of \ref _name.
+    //!
+    //! \cond INTERNAL
+    //! Let's round the size of `LabelEntry` to 64 bytes (as `ZoneAllocator` has granularity of 32 bytes anyway). This
+    //! gives `_name` the remaining space, which is should be 16 bytes on 64-bit and 28 bytes on 32-bit architectures.
+    //! \endcond
+    kStaticNameSize = 64 - (sizeof(ZoneHashNode) + 8 + sizeof(Section*) + sizeof(size_t) + sizeof(LabelLink*))
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Type of the label.
+  LabelType _type;
+  //! Must be zero.
+  uint8_t _reserved[3];
+  //! Label parent id or zero.
+  uint32_t _parentId;
+  //! Label offset relative to the start of the `_section`.
+  uint64_t _offset;
+  //! Section where the label was bound.
+  Section* _section;
+  //! Label links.
+  LabelLink* _links;
+  //! Label name.
+  ZoneString<kStaticNameSize> _name;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  // NOTE: Label id is stored in `_customData`, which is provided by ZoneHashNode to fill a padding that a C++
+  // compiler targeting 64-bit CPU will add to align the structure to 64-bits.
+
+  //! Returns label id.
+  inline uint32_t id() const noexcept { return _customData; }
+  //! Sets label id (internal, used only by `CodeHolder`).
+  inline void _setId(uint32_t id) noexcept { _customData = id; }
+
+  //! Returns label type.
+  inline LabelType type() const noexcept { return _type; }
+
+  //! Tests whether the label has a parent label.
+  inline bool hasParent() const noexcept { return _parentId != Globals::kInvalidId; }
+  //! Returns label's parent id.
+  inline uint32_t parentId() const noexcept { return _parentId; }
+
+  //! Returns the section where the label was bound.
+  //!
+  //! If the label was not yet bound the return value is `nullptr`.
+  inline Section* section() const noexcept { return _section; }
+
+  //! Tests whether the label has name.
+  inline bool hasName() const noexcept { return !_name.empty(); }
+
+  //! Returns the label's name.
+  //!
+  //! \note Local labels will return their local name without their parent part, for example ".L1".
+  inline const char* name() const noexcept { return _name.data(); }
+
+  //! Returns size of label's name.
+  //!
+  //! \note Label name is always null terminated, so you can use `strlen()` to get it, however, it's also cached in
+  //! `LabelEntry` itself, so if you want to know the size the fastest way is to call `LabelEntry::nameSize()`.
+  inline uint32_t nameSize() const noexcept { return _name.size(); }
+
+  //! Returns links associated with this label.
+  inline LabelLink* links() const noexcept { return _links; }
+
+  //! Tests whether the label is bound.
+  inline bool isBound() const noexcept { return _section != nullptr; }
+  //! Tests whether the label is bound to a the given `sectionId`.
+  inline bool isBoundTo(Section* section) const noexcept { return _section == section; }
+
+  //! Returns the label offset (only useful if the label is bound).
+  inline uint64_t offset() const noexcept { return _offset; }
+
+  //! Returns the hash-value of label's name and its parent label (if any).
+  //!
+  //! Label hash is calculated as `HASH(Name) ^ ParentId`. The hash function is implemented in `Support::hashString()`
+  //! and `Support::hashRound()`.
+  inline uint32_t hashCode() const noexcept { return _hashCode; }
+
+  //! \}
+};
+
+//! Holds assembled code and data (including sections, labels, and relocation information).
+//!
+//! CodeHolder connects emitters with their targets. It provides them interface that can be used to query information
+//! about the target environment (architecture, etc...) and API to create labels, sections, relocations, and to write
+//! data to a \ref CodeBuffer, which is always part of \ref Section. More than one emitter can be attached to a single
+//! CodeHolder instance at a time, which is used in practice
+//!
+//! CodeHolder provides interface for all emitter types. Assemblers use CodeHolder to write into \ref CodeBuffer, and
+//! higher level emitters like Builder and Compiler use CodeHolder to manage labels and sections so higher level code
+//! can be serialized to Assembler by \ref BaseEmitter::finalize() and \ref BaseBuilder::serializeTo().
+//!
+//! In order to use CodeHolder, it must be first initialized by \ref init(). After the CodeHolder has been successfully
+//! initialized it can be used to hold assembled code, sections, labels, relocations, and to attach / detach code
+//! emitters. After the end of code generation it can be used to query physical locations of labels and to relocate
+//! the assembled code into the right address.
+//!
+//! \note \ref CodeHolder has an ability to attach an \ref ErrorHandler, however, the error handler is not triggered
+//! by \ref CodeHolder itself, it's instead propagated to all emitters that attach to it.
+class CodeHolder {
+public:
+  ASMJIT_NONCOPYABLE(CodeHolder)
+
+  //! \name Members
+  //! \{
+
+  //! Environment information.
+  Environment _environment;
+  //! Base address or \ref Globals::kNoBaseAddress.
+  uint64_t _baseAddress;
+
+  //! Attached `Logger`, used by all consumers.
+  Logger* _logger;
+  //! Attached `ErrorHandler`.
+  ErrorHandler* _errorHandler;
+
+  //! Code zone (used to allocate core structures).
+  Zone _zone;
+  //! Zone allocator, used to manage internal containers.
+  ZoneAllocator _allocator;
+
+  //! Attached emitters.
+  ZoneVector<BaseEmitter*> _emitters;
+  //! Section entries.
+  ZoneVector<Section*> _sections;
+  //! Section entries sorted by section order and then section id.
+  ZoneVector<Section*> _sectionsByOrder;
+  //! Label entries.
+  ZoneVector<LabelEntry*> _labelEntries;
+  //! Relocation entries.
+  ZoneVector<RelocEntry*> _relocations;
+  //! Label name -> LabelEntry (only named labels).
+  ZoneHash<LabelEntry> _namedLabels;
+
+  //! Count of label links, which are not resolved.
+  size_t _unresolvedLinkCount;
+  //! Pointer to an address table section (or null if this section doesn't exist).
+  Section* _addressTableSection;
+  //! Address table entries.
+  ZoneTree<AddressTableEntry> _addressTableEntries;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates an uninitialized CodeHolder (you must init() it before it can be used).
+  //!
+  //! An optional `temporary` argument can be used to initialize the first block of \ref Zone that the CodeHolder
+  //! uses into a temporary memory provided by the user.
+  ASMJIT_API explicit CodeHolder(const Support::Temporary* temporary = nullptr) noexcept;
+
+  //! \overload
+  inline explicit CodeHolder(const Support::Temporary& temporary) noexcept
+    : CodeHolder(&temporary) {}
+
+  //! Destroys the CodeHolder and frees all resources it has allocated.
+  ASMJIT_API ~CodeHolder() noexcept;
+
+  //! Tests whether the `CodeHolder` has been initialized.
+  //!
+  //! Emitters can be only attached to initialized `CodeHolder` instances.
+  inline bool isInitialized() const noexcept { return _environment.isInitialized(); }
+
+  //! Initializes CodeHolder to hold code described by the given `environment` and `baseAddress`.
+  ASMJIT_API Error init(const Environment& environment, uint64_t baseAddress = Globals::kNoBaseAddress) noexcept;
+  //! Detaches all code-generators attached and resets the `CodeHolder`.
+  ASMJIT_API void reset(ResetPolicy resetPolicy = ResetPolicy::kSoft) noexcept;
+
+  //! \}
+
+  //! \name Attach & Detach
+  //! \{
+
+  //! Attaches an emitter to this `CodeHolder`.
+  ASMJIT_API Error attach(BaseEmitter* emitter) noexcept;
+  //! Detaches an emitter from this `CodeHolder`.
+  ASMJIT_API Error detach(BaseEmitter* emitter) noexcept;
+
+  //! \}
+
+  //! \name Allocators
+  //! \{
+
+  //! Returns the allocator that the `CodeHolder` uses.
+  //!
+  //! \note This should be only used for AsmJit's purposes. Code holder uses arena allocator to allocate everything,
+  //! so anything allocated through this allocator will be invalidated by \ref CodeHolder::reset() or by CodeHolder's
+  //! destructor.
+  inline ZoneAllocator* allocator() const noexcept { return const_cast<ZoneAllocator*>(&_allocator); }
+
+  //! \}
+
+  //! \name Code & Architecture
+  //! \{
+
+  //! Returns the target environment information.
+  inline const Environment& environment() const noexcept { return _environment; }
+
+  //! Returns the target architecture.
+  inline Arch arch() const noexcept { return environment().arch(); }
+  //! Returns the target sub-architecture.
+  inline SubArch subArch() const noexcept { return environment().subArch(); }
+
+  //! Tests whether a static base-address is set.
+  inline bool hasBaseAddress() const noexcept { return _baseAddress != Globals::kNoBaseAddress; }
+  //! Returns a static base-address or \ref Globals::kNoBaseAddress, if not set.
+  inline uint64_t baseAddress() const noexcept { return _baseAddress; }
+
+  //! \}
+
+  //! \name Emitters
+  //! \{
+
+  //! Returns a vector of attached emitters.
+  inline const ZoneVector<BaseEmitter*>& emitters() const noexcept { return _emitters; }
+
+  //! \}
+
+  //! \name Logging
+  //! \{
+
+  //! Returns the attached logger.
+  inline Logger* logger() const noexcept { return _logger; }
+  //! Attaches a `logger` to CodeHolder and propagates it to all attached emitters.
+  ASMJIT_API void setLogger(Logger* logger) noexcept;
+  //! Resets the logger to none.
+  inline void resetLogger() noexcept { setLogger(nullptr); }
+
+  //! \name Error Handling
+  //! \{
+
+  //! Tests whether the CodeHolder has an attached error handler, see \ref ErrorHandler.
+  inline bool hasErrorHandler() const noexcept { return _errorHandler != nullptr; }
+  //! Returns the attached error handler.
+  inline ErrorHandler* errorHandler() const noexcept { return _errorHandler; }
+  //! Attach an error handler to this `CodeHolder`.
+  ASMJIT_API void setErrorHandler(ErrorHandler* errorHandler) noexcept;
+  //! Resets the error handler to none.
+  inline void resetErrorHandler() noexcept { setErrorHandler(nullptr); }
+
+  //! \}
+
+  //! \name Code Buffer
+  //! \{
+
+  //! Makes sure that at least `n` bytes can be added to CodeHolder's buffer `cb`.
+  //!
+  //! \note The buffer `cb` must be managed by `CodeHolder` - otherwise the behavior of the function is undefined.
+  ASMJIT_API Error growBuffer(CodeBuffer* cb, size_t n) noexcept;
+
+  //! Reserves the size of `cb` to at least `n` bytes.
+  //!
+  //! \note The buffer `cb` must be managed by `CodeHolder` - otherwise the behavior of the function is undefined.
+  ASMJIT_API Error reserveBuffer(CodeBuffer* cb, size_t n) noexcept;
+
+  //! \}
+
+  //! \name Sections
+  //! \{
+
+  //! Returns an array of `Section*` records.
+  inline const ZoneVector<Section*>& sections() const noexcept { return _sections; }
+  //! Returns an array of `Section*` records sorted according to section order first, then section id.
+  inline const ZoneVector<Section*>& sectionsByOrder() const noexcept { return _sectionsByOrder; }
+  //! Returns the number of sections.
+  inline uint32_t sectionCount() const noexcept { return _sections.size(); }
+
+  //! Tests whether the given `sectionId` is valid.
+  inline bool isSectionValid(uint32_t sectionId) const noexcept { return sectionId < _sections.size(); }
+
+  //! Creates a new section and return its pointer in `sectionOut`.
+  //!
+  //! Returns `Error`, does not report a possible error to `ErrorHandler`.
+  ASMJIT_API Error newSection(Section** sectionOut, const char* name, size_t nameSize = SIZE_MAX, SectionFlags flags = SectionFlags::kNone, uint32_t alignment = 1, int32_t order = 0) noexcept;
+
+  //! Returns a section entry of the given index.
+  inline Section* sectionById(uint32_t sectionId) const noexcept { return _sections[sectionId]; }
+
+  //! Returns section-id that matches the given `name`.
+  //!
+  //! If there is no such section `Section::kInvalidId` is returned.
+  ASMJIT_API Section* sectionByName(const char* name, size_t nameSize = SIZE_MAX) const noexcept;
+
+  //! Returns '.text' section (section that commonly represents code).
+  //!
+  //! \note Text section is always the first section in \ref CodeHolder::sections() array.
+  inline Section* textSection() const noexcept { return _sections[0]; }
+
+  //! Tests whether '.addrtab' section exists.
+  inline bool hasAddressTable() const noexcept { return _addressTableSection != nullptr; }
+
+  //! Returns '.addrtab' section.
+  //!
+  //! This section is used exclusively by AsmJit to store absolute 64-bit
+  //! addresses that cannot be encoded in instructions like 'jmp' or 'call'.
+  //!
+  //! \note This section is created on demand, the returned pointer can be null.
+  inline Section* addressTableSection() const noexcept { return _addressTableSection; }
+
+  //! Ensures that '.addrtab' section exists (creates it if it doesn't) and
+  //! returns it. Can return `nullptr` on out of memory condition.
+  ASMJIT_API Section* ensureAddressTableSection() noexcept;
+
+  //! Used to add an address to an address table.
+  //!
+  //! This implicitly calls `ensureAddressTableSection()` and then creates `AddressTableEntry` that is inserted
+  //! to `_addressTableEntries`. If the address already exists this operation does nothing as the same addresses
+  //! use the same slot.
+  //!
+  //! This function should be considered internal as it's used by assemblers to insert an absolute address into the
+  //! address table. Inserting address into address table without creating a particula relocation entry makes no sense.
+  ASMJIT_API Error addAddressToAddressTable(uint64_t address) noexcept;
+
+  //! \}
+
+  //! \name Labels & Symbols
+  //! \{
+
+  //! Returns array of `LabelEntry*` records.
+  inline const ZoneVector<LabelEntry*>& labelEntries() const noexcept { return _labelEntries; }
+
+  //! Returns number of labels created.
+  inline uint32_t labelCount() const noexcept { return _labelEntries.size(); }
+
+  //! Tests whether the label having `id` is valid (i.e. created by `newLabelEntry()`).
+  inline bool isLabelValid(uint32_t labelId) const noexcept {
+    return labelId < _labelEntries.size();
+  }
+
+  //! Tests whether the `label` is valid (i.e. created by `newLabelEntry()`).
+  inline bool isLabelValid(const Label& label) const noexcept {
+    return label.id() < _labelEntries.size();
+  }
+
+  //! \overload
+  inline bool isLabelBound(uint32_t labelId) const noexcept {
+    return isLabelValid(labelId) && _labelEntries[labelId]->isBound();
+  }
+
+  //! Tests whether the `label` is already bound.
+  //!
+  //! Returns `false` if the `label` is not valid.
+  inline bool isLabelBound(const Label& label) const noexcept {
+    return isLabelBound(label.id());
+  }
+
+  //! Returns LabelEntry of the given label `id`.
+  inline LabelEntry* labelEntry(uint32_t labelId) const noexcept {
+    return isLabelValid(labelId) ? _labelEntries[labelId] : static_cast<LabelEntry*>(nullptr);
+  }
+
+  //! Returns LabelEntry of the given `label`.
+  inline LabelEntry* labelEntry(const Label& label) const noexcept {
+    return labelEntry(label.id());
+  }
+
+  //! Returns offset of a `Label` by its `labelId`.
+  //!
+  //! The offset returned is relative to the start of the section. Zero offset is returned for unbound labels,
+  //! which is their initial offset value.
+  inline uint64_t labelOffset(uint32_t labelId) const noexcept {
+    ASMJIT_ASSERT(isLabelValid(labelId));
+    return _labelEntries[labelId]->offset();
+  }
+
+  //! \overload
+  inline uint64_t labelOffset(const Label& label) const noexcept {
+    return labelOffset(label.id());
+  }
+
+  //! Returns offset of a label by it's `labelId` relative to the base offset.
+  //!
+  //! \remarks The offset of the section where the label is bound must be valid in order to use this function,
+  //! otherwise the value returned will not be reliable.
+  inline uint64_t labelOffsetFromBase(uint32_t labelId) const noexcept {
+    ASMJIT_ASSERT(isLabelValid(labelId));
+    const LabelEntry* le = _labelEntries[labelId];
+    return (le->isBound() ? le->section()->offset() : uint64_t(0)) + le->offset();
+  }
+
+  //! \overload
+  inline uint64_t labelOffsetFromBase(const Label& label) const noexcept {
+    return labelOffsetFromBase(label.id());
+  }
+
+  //! Creates a new anonymous label and return its id in `idOut`.
+  //!
+  //! Returns `Error`, does not report error to `ErrorHandler`.
+  ASMJIT_API Error newLabelEntry(LabelEntry** entryOut) noexcept;
+
+  //! Creates a new named \ref LabelEntry of the given label `type`.
+  //!
+  //! \param entryOut Where to store the created \ref LabelEntry.
+  //! \param name The name of the label.
+  //! \param nameSize The length of `name` argument, or `SIZE_MAX` if `name` is a null terminated string, which
+  //!        means that the `CodeHolder` will use `strlen()` to determine the length.
+  //! \param type The type of the label to create, see \ref LabelType.
+  //! \param parentId Parent id of a local label, otherwise it must be \ref Globals::kInvalidId.
+  //! \retval Always returns \ref Error, does not report a possible error to the attached \ref ErrorHandler.
+  //!
+  //! AsmJit has a support for local labels (\ref LabelType::kLocal) which require a parent label id (parentId).
+  //! The names of local labels can conflict with names of other local labels that have a different parent. In
+  //! addition, AsmJit supports named anonymous labels, which are useful only for debugging purposes as the
+  //! anonymous name will have a name, which will be formatted, but the label itself cannot be queried by such
+  //! name.
+  ASMJIT_API Error newNamedLabelEntry(LabelEntry** entryOut, const char* name, size_t nameSize, LabelType type, uint32_t parentId = Globals::kInvalidId) noexcept;
+
+  //! Returns a label by name.
+  //!
+  //! If the named label doesn't a default constructed \ref Label is returned,
+  //! which has its id set to \ref Globals::kInvalidId.
+  inline Label labelByName(const char* name, size_t nameSize = SIZE_MAX, uint32_t parentId = Globals::kInvalidId) noexcept {
+    return Label(labelIdByName(name, nameSize, parentId));
+  }
+
+  //! Returns a label id by name.
+  //!
+  //! If the named label doesn't exist \ref Globals::kInvalidId is returned.
+  ASMJIT_API uint32_t labelIdByName(const char* name, size_t nameSize = SIZE_MAX, uint32_t parentId = Globals::kInvalidId) noexcept;
+
+  //! Tests whether there are any unresolved label links.
+  inline bool hasUnresolvedLinks() const noexcept { return _unresolvedLinkCount != 0; }
+  //! Returns the number of label links, which are unresolved.
+  inline size_t unresolvedLinkCount() const noexcept { return _unresolvedLinkCount; }
+
+  //! Creates a new label-link used to store information about yet unbound labels.
+  //!
+  //! Returns `null` if the allocation failed.
+  ASMJIT_API LabelLink* newLabelLink(LabelEntry* le, uint32_t sectionId, size_t offset, intptr_t rel, const OffsetFormat& format) noexcept;
+
+  //! Resolves cross-section links (`LabelLink`) associated with each label that was used as a destination in code
+  //! of a different section. It's only useful to people that use multiple sections as it will do nothing if the code
+  //! only contains a single section in which cross-section links are not possible.
+  ASMJIT_API Error resolveUnresolvedLinks() noexcept;
+
+  //! Binds a label to a given `sectionId` and `offset` (relative to start of the section).
+  //!
+  //! This function is generally used by `BaseAssembler::bind()` to do the heavy lifting.
+  ASMJIT_API Error bindLabel(const Label& label, uint32_t sectionId, uint64_t offset) noexcept;
+
+  //! \}
+
+  //! \name Relocations
+  //! \{
+
+  //! Tests whether the code contains relocation entries.
+  inline bool hasRelocEntries() const noexcept { return !_relocations.empty(); }
+  //! Returns array of `RelocEntry*` records.
+  inline const ZoneVector<RelocEntry*>& relocEntries() const noexcept { return _relocations; }
+
+  //! Returns a RelocEntry of the given `id`.
+  inline RelocEntry* relocEntry(uint32_t id) const noexcept { return _relocations[id]; }
+
+  //! Creates a new relocation entry of type `relocType`.
+  //!
+  //! Additional fields can be set after the relocation entry was created.
+  ASMJIT_API Error newRelocEntry(RelocEntry** dst, RelocType relocType) noexcept;
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Flattens all sections by recalculating their offsets, starting at 0.
+  //!
+  //! \note This should never be called more than once.
+  ASMJIT_API Error flatten() noexcept;
+
+  //! Returns computed the size of code & data of all sections.
+  //!
+  //! \note All sections will be iterated over and the code size returned would represent the minimum code size of
+  //! all combined sections after applying minimum alignment. Code size may decrease after calling `flatten()` and
+  //! `relocateToBase()`.
+  ASMJIT_API size_t codeSize() const noexcept;
+
+  //! Relocates the code to the given `baseAddress`.
+  //!
+  //! \param baseAddress Absolute base address where the code will be relocated to. Please note that nothing is
+  //! copied to such base address, it's just an absolute value used by the relocator to resolve all stored relocations.
+  //!
+  //! \note This should never be called more than once.
+  ASMJIT_API Error relocateToBase(uint64_t baseAddress) noexcept;
+
+  //! Copies a single section into `dst`.
+  ASMJIT_API Error copySectionData(void* dst, size_t dstSize, uint32_t sectionId, CopySectionFlags copyFlags = CopySectionFlags::kNone) noexcept;
+
+  //! Copies all sections into `dst`.
+  //!
+  //! This should only be used if the data was flattened and there are no gaps between the sections. The `dstSize`
+  //! is always checked and the copy will never write anything outside the provided buffer.
+  ASMJIT_API Error copyFlattenedData(void* dst, size_t dstSize, CopySectionFlags copyFlags = CopySectionFlags::kNone) noexcept;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_CODEHOLDER_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/codewriter.cpp b/lib/lepton/asmjit/core/codewriter.cpp
new file mode 100644
index 0000000000..1babc5f172
--- /dev/null
+++ b/lib/lepton/asmjit/core/codewriter.cpp
@@ -0,0 +1,175 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/codeholder.h"
+#include "../core/codewriter_p.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+bool CodeWriterUtils::encodeOffset32(uint32_t* dst, int64_t offset64, const OffsetFormat& format) noexcept {
+  uint32_t bitCount = format.immBitCount();
+  uint32_t bitShift = format.immBitShift();
+  uint32_t discardLsb = format.immDiscardLsb();
+
+  // Invalid offset (should not happen).
+  if (!bitCount || bitCount > format.valueSize() * 8u)
+    return false;
+
+  uint32_t value;
+
+  // First handle all unsigned offset types.
+  if (format.type() == OffsetType::kUnsignedOffset) {
+    if (discardLsb) {
+      ASMJIT_ASSERT(discardLsb <= 32);
+      if ((offset64 & Support::lsbMask<uint32_t>(discardLsb)) != 0)
+        return false;
+      offset64 = int64_t(uint64_t(offset64) >> discardLsb);
+    }
+
+    value = uint32_t(offset64 & Support::lsbMask<uint32_t>(bitCount));
+    if (value != offset64)
+      return false;
+  }
+  else {
+    // The rest of OffsetType options are all signed.
+    if (discardLsb) {
+      ASMJIT_ASSERT(discardLsb <= 32);
+      if ((offset64 & Support::lsbMask<uint32_t>(discardLsb)) != 0)
+        return false;
+      offset64 >>= discardLsb;
+    }
+
+    if (!Support::isInt32(offset64))
+      return false;
+
+    value = uint32_t(int32_t(offset64));
+    if (!Support::isEncodableOffset32(int32_t(value), bitCount))
+      return false;
+  }
+
+  switch (format.type()) {
+    case OffsetType::kSignedOffset:
+    case OffsetType::kUnsignedOffset: {
+      *dst = (value & Support::lsbMask<uint32_t>(bitCount)) << bitShift;
+      return true;
+    }
+
+    case OffsetType::kAArch64_ADR:
+    case OffsetType::kAArch64_ADRP: {
+      // Sanity checks.
+      if (format.valueSize() != 4 || bitCount != 21 || bitShift != 5)
+        return false;
+
+      uint32_t immLo = value & 0x3u;
+      uint32_t immHi = (value >> 2) & Support::lsbMask<uint32_t>(19);
+
+      *dst = (immLo << 29) | (immHi << 5);
+      return true;
+    }
+
+    default:
+      return false;
+  }
+}
+
+bool CodeWriterUtils::encodeOffset64(uint64_t* dst, int64_t offset64, const OffsetFormat& format) noexcept {
+  uint32_t bitCount = format.immBitCount();
+  uint32_t discardLsb = format.immDiscardLsb();
+
+  if (!bitCount || bitCount > format.valueSize() * 8u)
+    return false;
+
+  uint64_t value;
+
+  // First handle all unsigned offset types.
+  if (format.type() == OffsetType::kUnsignedOffset) {
+    if (discardLsb) {
+      ASMJIT_ASSERT(discardLsb <= 32);
+      if ((offset64 & Support::lsbMask<uint32_t>(discardLsb)) != 0)
+        return false;
+      offset64 = int64_t(uint64_t(offset64) >> discardLsb);
+    }
+
+    value = uint64_t(offset64) & Support::lsbMask<uint64_t>(bitCount);
+    if (value != uint64_t(offset64))
+      return false;
+  }
+  else {
+    // The rest of OffsetType options are all signed.
+    if (discardLsb) {
+      ASMJIT_ASSERT(discardLsb <= 32);
+      if ((offset64 & Support::lsbMask<uint32_t>(discardLsb)) != 0)
+        return false;
+      offset64 >>= discardLsb;
+    }
+
+    if (!Support::isEncodableOffset64(offset64, bitCount))
+      return false;
+
+    value = uint64_t(offset64);
+  }
+
+  switch (format.type()) {
+    case OffsetType::kSignedOffset:
+    case OffsetType::kUnsignedOffset: {
+      *dst = (value & Support::lsbMask<uint64_t>(bitCount)) << format.immBitShift();
+      return true;
+    }
+
+    default:
+      return false;
+  }
+}
+
+bool CodeWriterUtils::writeOffset(void* dst, int64_t offset64, const OffsetFormat& format) noexcept {
+  // Offset the destination by ValueOffset so the `dst` points to the
+  // patched word instead of the beginning of the patched region.
+  dst = static_cast<char*>(dst) + format.valueOffset();
+
+  switch (format.valueSize()) {
+    case 1: {
+      uint32_t mask;
+      if (!encodeOffset32(&mask, offset64, format))
+        return false;
+
+      Support::writeU8(dst, uint8_t(Support::readU8(dst) | mask));
+      return true;
+    }
+
+    case 2: {
+      uint32_t mask;
+      if (!encodeOffset32(&mask, offset64, format))
+        return false;
+
+      Support::writeU16uLE(dst, uint16_t(Support::readU16uLE(dst) | mask));
+      return true;
+    }
+
+    case 4: {
+      uint32_t mask;
+      if (!encodeOffset32(&mask, offset64, format)) {
+        return false;
+      }
+
+      Support::writeU32uLE(dst, Support::readU32uLE(dst) | mask);
+      return true;
+    }
+
+    case 8: {
+      uint64_t mask;
+      if (!encodeOffset64(&mask, offset64, format))
+        return false;
+
+      Support::writeU64uLE(dst, Support::readU64uLE(dst) | mask);
+      return true;
+    }
+
+    default:
+      return false;
+  }
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/codewriter_p.h b/lib/lepton/asmjit/core/codewriter_p.h
new file mode 100644
index 0000000000..c799241490
--- /dev/null
+++ b/lib/lepton/asmjit/core/codewriter_p.h
@@ -0,0 +1,179 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_CODEBUFFERWRITER_P_H_INCLUDED
+#define ASMJIT_CORE_CODEBUFFERWRITER_P_H_INCLUDED
+
+#include "../core/assembler.h"
+#include "../core/codebuffer.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_assembler
+//! \{
+
+struct OffsetFormat;
+
+//! Helper that is used to write into a \ref CodeBuffer held by \ref BaseAssembler.
+class CodeWriter {
+public:
+  uint8_t* _cursor;
+
+  ASMJIT_FORCE_INLINE explicit CodeWriter(BaseAssembler* a) noexcept
+    : _cursor(a->_bufferPtr) {}
+
+  ASMJIT_FORCE_INLINE Error ensureSpace(BaseAssembler* a, size_t n) noexcept {
+    size_t remainingSpace = (size_t)(a->_bufferEnd - _cursor);
+    if (ASMJIT_UNLIKELY(remainingSpace < n)) {
+      CodeBuffer& buffer = a->_section->_buffer;
+      Error err = a->_code->growBuffer(&buffer, n);
+      if (ASMJIT_UNLIKELY(err))
+        return a->reportError(err);
+      _cursor = a->_bufferPtr;
+    }
+    return kErrorOk;
+  }
+
+  ASMJIT_FORCE_INLINE uint8_t* cursor() const noexcept { return _cursor; }
+  ASMJIT_FORCE_INLINE void setCursor(uint8_t* cursor) noexcept { _cursor = cursor; }
+  ASMJIT_FORCE_INLINE void advance(size_t n) noexcept { _cursor += n; }
+
+  ASMJIT_FORCE_INLINE size_t offsetFrom(uint8_t* from) const noexcept {
+    ASMJIT_ASSERT(_cursor >= from);
+    return (size_t)(_cursor - from);
+  }
+
+  template<typename T>
+  ASMJIT_FORCE_INLINE void emit8(T val) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+    _cursor[0] = uint8_t(U(val) & U(0xFF));
+    _cursor++;
+  }
+
+  template<typename T, typename Y>
+  ASMJIT_FORCE_INLINE void emit8If(T val, Y cond) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+    ASMJIT_ASSERT(size_t(cond) <= 1u);
+
+    _cursor[0] = uint8_t(U(val) & U(0xFF));
+    _cursor += size_t(cond);
+  }
+
+  template<typename T>
+  ASMJIT_FORCE_INLINE void emit16uLE(T val) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+    Support::writeU16uLE(_cursor, uint16_t(U(val) & 0xFFFFu));
+    _cursor += 2;
+  }
+
+  template<typename T>
+  ASMJIT_FORCE_INLINE void emit16uBE(T val) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+    Support::writeU16uBE(_cursor, uint16_t(U(val) & 0xFFFFu));
+    _cursor += 2;
+  }
+
+  template<typename T>
+  ASMJIT_FORCE_INLINE void emit32uLE(T val) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+    Support::writeU32uLE(_cursor, uint32_t(U(val) & 0xFFFFFFFFu));
+    _cursor += 4;
+  }
+
+  template<typename T>
+  ASMJIT_FORCE_INLINE void emit32uBE(T val) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+    Support::writeU32uBE(_cursor, uint32_t(U(val) & 0xFFFFFFFFu));
+    _cursor += 4;
+  }
+
+  ASMJIT_FORCE_INLINE void emitData(const void* data, size_t size) noexcept {
+    ASMJIT_ASSERT(size != 0);
+    memcpy(_cursor, data, size);
+    _cursor += size;
+  }
+
+  template<typename T>
+  ASMJIT_FORCE_INLINE void emitValueLE(const T& value, size_t size) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+    ASMJIT_ASSERT(size <= sizeof(T));
+
+    U v = U(value);
+    for (uint32_t i = 0; i < size; i++) {
+      _cursor[i] = uint8_t(v & 0xFFu);
+      v >>= 8;
+    }
+    _cursor += size;
+  }
+
+  template<typename T>
+  ASMJIT_FORCE_INLINE void emitValueBE(const T& value, size_t size) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+    ASMJIT_ASSERT(size <= sizeof(T));
+
+    U v = U(value);
+    for (uint32_t i = 0; i < size; i++) {
+      _cursor[i] = uint8_t(v >> (sizeof(T) - 8));
+      v <<= 8;
+    }
+    _cursor += size;
+  }
+
+  ASMJIT_FORCE_INLINE void emitZeros(size_t size) noexcept {
+    ASMJIT_ASSERT(size != 0);
+    memset(_cursor, 0, size);
+    _cursor += size;
+  }
+
+  ASMJIT_FORCE_INLINE void remove8(uint8_t* where) noexcept {
+    ASMJIT_ASSERT(where < _cursor);
+
+    uint8_t* p = where;
+    while (++p != _cursor)
+      p[-1] = p[0];
+    _cursor--;
+  }
+
+  template<typename T>
+  ASMJIT_FORCE_INLINE void insert8(uint8_t* where, T val) noexcept {
+    uint8_t* p = _cursor;
+
+    while (p != where) {
+      p[0] = p[-1];
+      p--;
+    }
+
+    *p = uint8_t(val & 0xFF);
+    _cursor++;
+  }
+
+  ASMJIT_FORCE_INLINE void done(BaseAssembler* a) noexcept {
+    CodeBuffer& buffer = a->_section->_buffer;
+    size_t newSize = (size_t)(_cursor - a->_bufferData);
+    ASMJIT_ASSERT(newSize <= buffer.capacity());
+
+    a->_bufferPtr = _cursor;
+    buffer._size = Support::max(buffer._size, newSize);
+  }
+};
+
+//! Code writer utilities.
+namespace CodeWriterUtils {
+
+bool encodeOffset32(uint32_t* dst, int64_t offset64, const OffsetFormat& format) noexcept;
+bool encodeOffset64(uint64_t* dst, int64_t offset64, const OffsetFormat& format) noexcept;
+
+bool writeOffset(void* dst, int64_t offset64, const OffsetFormat& format) noexcept;
+
+} // {CodeWriterUtils}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_CODEBUFFERWRITER_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/compiler.cpp b/lib/lepton/asmjit/core/compiler.cpp
new file mode 100644
index 0000000000..b1c6b803b2
--- /dev/null
+++ b/lib/lepton/asmjit/core/compiler.cpp
@@ -0,0 +1,582 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/assembler.h"
+#include "../core/compiler.h"
+#include "../core/cpuinfo.h"
+#include "../core/logger.h"
+#include "../core/rapass_p.h"
+#include "../core/rastack_p.h"
+#include "../core/support.h"
+#include "../core/type.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// GlobalConstPoolPass
+// ===================
+
+class GlobalConstPoolPass : public Pass {
+public:
+  typedef Pass Base;
+public:
+  ASMJIT_NONCOPYABLE(GlobalConstPoolPass)
+
+  GlobalConstPoolPass() noexcept : Pass("GlobalConstPoolPass") {}
+
+  Error run(Zone* zone, Logger* logger) override {
+    DebugUtils::unused(zone, logger);
+
+    // Flush the global constant pool.
+    BaseCompiler* compiler = static_cast<BaseCompiler*>(_cb);
+    ConstPoolNode* globalConstPool = compiler->_constPools[uint32_t(ConstPoolScope::kGlobal)];
+
+    if (globalConstPool) {
+      compiler->addAfter(globalConstPool, compiler->lastNode());
+      compiler->_constPools[uint32_t(ConstPoolScope::kGlobal)] = nullptr;
+    }
+
+    return kErrorOk;
+  }
+};
+
+// BaseCompiler - Construction & Destruction
+// =========================================
+
+BaseCompiler::BaseCompiler() noexcept
+  : BaseBuilder(),
+    _func(nullptr),
+    _vRegZone(4096 - Zone::kBlockOverhead),
+    _vRegArray(),
+    _constPools { nullptr, nullptr } {
+  _emitterType = EmitterType::kCompiler;
+  _validationFlags = ValidationFlags::kEnableVirtRegs;
+}
+BaseCompiler::~BaseCompiler() noexcept {}
+
+// BaseCompiler - Function Management
+// ==================================
+
+Error BaseCompiler::newFuncNode(FuncNode** out, const FuncSignature& signature) {
+  *out = nullptr;
+
+  // Create FuncNode together with all the required surrounding nodes.
+  FuncNode* funcNode;
+  ASMJIT_PROPAGATE(_newNodeT<FuncNode>(&funcNode));
+  ASMJIT_PROPAGATE(newLabelNode(&funcNode->_exitNode));
+  ASMJIT_PROPAGATE(_newNodeT<SentinelNode>(&funcNode->_end, SentinelType::kFuncEnd));
+
+  // Initialize the function's detail info.
+  Error err = funcNode->detail().init(signature, environment());
+  if (ASMJIT_UNLIKELY(err))
+    return reportError(err);
+
+  // If the Target guarantees greater stack alignment than required by the calling convention
+  // then override it as we can prevent having to perform dynamic stack alignment
+  uint32_t environmentStackAlignment = _environment.stackAlignment();
+
+  if (funcNode->_funcDetail._callConv.naturalStackAlignment() < environmentStackAlignment)
+    funcNode->_funcDetail._callConv.setNaturalStackAlignment(environmentStackAlignment);
+
+  // Initialize the function frame.
+  err = funcNode->_frame.init(funcNode->_funcDetail);
+  if (ASMJIT_UNLIKELY(err))
+    return reportError(err);
+
+  // Allocate space for function arguments.
+  funcNode->_args = nullptr;
+  if (funcNode->argCount() != 0) {
+    funcNode->_args = _allocator.allocT<FuncNode::ArgPack>(funcNode->argCount() * sizeof(FuncNode::ArgPack));
+    if (ASMJIT_UNLIKELY(!funcNode->_args))
+      return reportError(DebugUtils::errored(kErrorOutOfMemory));
+    memset(funcNode->_args, 0, funcNode->argCount() * sizeof(FuncNode::ArgPack));
+  }
+
+  ASMJIT_PROPAGATE(registerLabelNode(funcNode));
+
+  *out = funcNode;
+  return kErrorOk;
+}
+
+Error BaseCompiler::addFuncNode(FuncNode** out, const FuncSignature& signature) {
+  ASMJIT_PROPAGATE(newFuncNode(out, signature));
+  ASMJIT_ASSUME(*out != nullptr);
+
+  addFunc(*out);
+  return kErrorOk;
+}
+
+Error BaseCompiler::newFuncRetNode(FuncRetNode** out, const Operand_& o0, const Operand_& o1) {
+  uint32_t opCount = !o1.isNone() ? 2u : !o0.isNone() ? 1u : 0u;
+  FuncRetNode* node;
+
+  ASMJIT_PROPAGATE(_newNodeT<FuncRetNode>(&node));
+  ASMJIT_ASSUME(node != nullptr);
+
+  node->setOpCount(opCount);
+  node->setOp(0, o0);
+  node->setOp(1, o1);
+  node->resetOpRange(2, node->opCapacity());
+
+  *out = node;
+  return kErrorOk;
+}
+
+Error BaseCompiler::addFuncRetNode(FuncRetNode** out, const Operand_& o0, const Operand_& o1) {
+  ASMJIT_PROPAGATE(newFuncRetNode(out, o0, o1));
+  addNode(*out);
+  return kErrorOk;
+}
+
+FuncNode* BaseCompiler::addFunc(FuncNode* func) {
+  _func = func;
+
+  addNode(func);                 // Function node.
+  BaseNode* prev = cursor();     // {CURSOR}.
+  addNode(func->exitNode());     // Function exit label.
+  addNode(func->endNode());      // Function end sentinel.
+
+  _setCursor(prev);
+  return func;
+}
+
+Error BaseCompiler::endFunc() {
+  FuncNode* func = _func;
+
+  if (ASMJIT_UNLIKELY(!func))
+    return reportError(DebugUtils::errored(kErrorInvalidState));
+
+  // Add the local constant pool at the end of the function (if exists).
+  ConstPoolNode* localConstPool = _constPools[uint32_t(ConstPoolScope::kLocal)];
+  if (localConstPool) {
+    setCursor(func->endNode()->prev());
+    addNode(localConstPool);
+    _constPools[uint32_t(ConstPoolScope::kLocal)] = nullptr;
+  }
+
+  // Mark as finished.
+  _func = nullptr;
+
+  SentinelNode* end = func->endNode();
+  setCursor(end);
+
+  return kErrorOk;
+}
+
+// BaseCompiler - Function Invocation
+// ==================================
+
+Error BaseCompiler::newInvokeNode(InvokeNode** out, InstId instId, const Operand_& o0, const FuncSignature& signature) {
+  InvokeNode* node;
+  ASMJIT_PROPAGATE(_newNodeT<InvokeNode>(&node, instId, InstOptions::kNone));
+
+  node->setOpCount(1);
+  node->setOp(0, o0);
+  node->resetOpRange(1, node->opCapacity());
+
+  Error err = node->detail().init(signature, environment());
+  if (ASMJIT_UNLIKELY(err))
+    return reportError(err);
+
+  // Skip the allocation if there are no arguments.
+  uint32_t argCount = signature.argCount();
+  if (argCount) {
+    node->_args = static_cast<InvokeNode::OperandPack*>(_allocator.alloc(argCount * sizeof(InvokeNode::OperandPack)));
+    if (!node->_args)
+      return reportError(DebugUtils::errored(kErrorOutOfMemory));
+    memset(node->_args, 0, argCount * sizeof(InvokeNode::OperandPack));
+  }
+
+  *out = node;
+  return kErrorOk;
+}
+
+Error BaseCompiler::addInvokeNode(InvokeNode** out, InstId instId, const Operand_& o0, const FuncSignature& signature) {
+  ASMJIT_PROPAGATE(newInvokeNode(out, instId, o0, signature));
+  addNode(*out);
+  return kErrorOk;
+}
+
+// BaseCompiler - Virtual Registers
+// ================================
+
+static void BaseCompiler_assignGenericName(BaseCompiler* self, VirtReg* vReg) {
+  uint32_t index = unsigned(Operand::virtIdToIndex(vReg->_id));
+
+  char buf[64];
+  int size = snprintf(buf, ASMJIT_ARRAY_SIZE(buf), "%%%u", unsigned(index));
+
+  ASMJIT_ASSERT(size > 0 && size < int(ASMJIT_ARRAY_SIZE(buf)));
+  vReg->_name.setData(&self->_dataZone, buf, unsigned(size));
+}
+
+Error BaseCompiler::newVirtReg(VirtReg** out, TypeId typeId, OperandSignature signature, const char* name) {
+  *out = nullptr;
+  uint32_t index = _vRegArray.size();
+
+  if (ASMJIT_UNLIKELY(index >= uint32_t(Operand::kVirtIdCount)))
+    return reportError(DebugUtils::errored(kErrorTooManyVirtRegs));
+
+  if (ASMJIT_UNLIKELY(_vRegArray.willGrow(&_allocator) != kErrorOk))
+    return reportError(DebugUtils::errored(kErrorOutOfMemory));
+
+  VirtReg* vReg = _vRegZone.allocZeroedT<VirtReg>();
+  if (ASMJIT_UNLIKELY(!vReg))
+    return reportError(DebugUtils::errored(kErrorOutOfMemory));
+
+  uint32_t size = TypeUtils::sizeOf(typeId);
+  uint32_t alignment = Support::min<uint32_t>(size, 64);
+
+  vReg = new(vReg) VirtReg(signature, Operand::indexToVirtId(index), size, alignment, typeId);
+
+#ifndef ASMJIT_NO_LOGGING
+  if (name && name[0] != '\0')
+    vReg->_name.setData(&_dataZone, name, SIZE_MAX);
+  else
+    BaseCompiler_assignGenericName(this, vReg);
+#else
+  DebugUtils::unused(name);
+#endif
+
+  _vRegArray.appendUnsafe(vReg);
+  *out = vReg;
+
+  return kErrorOk;
+}
+
+Error BaseCompiler::_newReg(BaseReg* out, TypeId typeId, const char* name) {
+  OperandSignature regSignature;
+  out->reset();
+
+  Error err = ArchUtils::typeIdToRegSignature(arch(), typeId, &typeId, &regSignature);
+  if (ASMJIT_UNLIKELY(err))
+    return reportError(err);
+
+  VirtReg* vReg;
+  ASMJIT_PROPAGATE(newVirtReg(&vReg, typeId, regSignature, name));
+  ASMJIT_ASSUME(vReg != nullptr);
+
+  out->_initReg(regSignature, vReg->id());
+  return kErrorOk;
+}
+
+Error BaseCompiler::_newRegFmt(BaseReg* out, TypeId typeId, const char* fmt, ...) {
+  va_list ap;
+  StringTmp<256> sb;
+
+  va_start(ap, fmt);
+  sb.appendVFormat(fmt, ap);
+  va_end(ap);
+
+  return _newReg(out, typeId, sb.data());
+}
+
+Error BaseCompiler::_newReg(BaseReg* out, const BaseReg& ref, const char* name) {
+  out->reset();
+
+  OperandSignature regSignature;
+  TypeId typeId;
+
+  if (isVirtRegValid(ref)) {
+    VirtReg* vRef = virtRegByReg(ref);
+    typeId = vRef->typeId();
+
+    // NOTE: It's possible to cast one register type to another if it's the same register group. However, VirtReg
+    // always contains the TypeId that was used to create the register. This means that in some cases we may end
+    // up having different size of `ref` and `vRef`. In such case we adjust the TypeId to match the `ref` register
+    // type instead of the original register type, which should be the expected behavior.
+    uint32_t typeSize = TypeUtils::sizeOf(typeId);
+    uint32_t refSize = ref.size();
+
+    if (typeSize != refSize) {
+      if (TypeUtils::isInt(typeId)) {
+        // GP register - change TypeId to match `ref`, but keep sign of `vRef`.
+        switch (refSize) {
+          case  1: typeId = TypeId(uint32_t(TypeId::kInt8 ) | (uint32_t(typeId) & 1)); break;
+          case  2: typeId = TypeId(uint32_t(TypeId::kInt16) | (uint32_t(typeId) & 1)); break;
+          case  4: typeId = TypeId(uint32_t(TypeId::kInt32) | (uint32_t(typeId) & 1)); break;
+          case  8: typeId = TypeId(uint32_t(TypeId::kInt64) | (uint32_t(typeId) & 1)); break;
+          default: typeId = TypeId::kVoid; break;
+        }
+      }
+      else if (TypeUtils::isMmx(typeId)) {
+        // MMX register - always use 64-bit.
+        typeId = TypeId::kMmx64;
+      }
+      else if (TypeUtils::isMask(typeId)) {
+        // Mask register - change TypeId to match `ref` size.
+        switch (refSize) {
+          case  1: typeId = TypeId::kMask8; break;
+          case  2: typeId = TypeId::kMask16; break;
+          case  4: typeId = TypeId::kMask32; break;
+          case  8: typeId = TypeId::kMask64; break;
+          default: typeId = TypeId::kVoid; break;
+        }
+      }
+      else {
+        // Vector register - change TypeId to match `ref` size, keep vector metadata.
+        TypeId scalarTypeId = TypeUtils::scalarOf(typeId);
+        switch (refSize) {
+          case 16: typeId = TypeUtils::scalarToVector(scalarTypeId, TypeId::_kVec128Start); break;
+          case 32: typeId = TypeUtils::scalarToVector(scalarTypeId, TypeId::_kVec256Start); break;
+          case 64: typeId = TypeUtils::scalarToVector(scalarTypeId, TypeId::_kVec512Start); break;
+          default: typeId = TypeId::kVoid; break;
+        }
+      }
+
+      if (typeId == TypeId::kVoid)
+        return reportError(DebugUtils::errored(kErrorInvalidState));
+    }
+  }
+  else {
+    typeId = ArchTraits::byArch(arch()).regTypeToTypeId(ref.type());
+  }
+
+  Error err = ArchUtils::typeIdToRegSignature(arch(), typeId, &typeId, &regSignature);
+  if (ASMJIT_UNLIKELY(err))
+    return reportError(err);
+
+  VirtReg* vReg;
+  ASMJIT_PROPAGATE(newVirtReg(&vReg, typeId, regSignature, name));
+  ASMJIT_ASSUME(vReg != nullptr);
+
+  out->_initReg(regSignature, vReg->id());
+  return kErrorOk;
+}
+
+Error BaseCompiler::_newRegFmt(BaseReg* out, const BaseReg& ref, const char* fmt, ...) {
+  va_list ap;
+  StringTmp<256> sb;
+
+  va_start(ap, fmt);
+  sb.appendVFormat(fmt, ap);
+  va_end(ap);
+
+  return _newReg(out, ref, sb.data());
+}
+
+Error BaseCompiler::_newStack(BaseMem* out, uint32_t size, uint32_t alignment, const char* name) {
+  out->reset();
+
+  if (size == 0)
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  if (alignment == 0)
+    alignment = 1;
+
+  if (!Support::isPowerOf2(alignment))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  if (alignment > 64)
+    alignment = 64;
+
+  VirtReg* vReg;
+  ASMJIT_PROPAGATE(newVirtReg(&vReg, TypeId::kVoid, OperandSignature{0}, name));
+  ASMJIT_ASSUME(vReg != nullptr);
+
+  vReg->_virtSize = size;
+  vReg->_isStack = true;
+  vReg->_alignment = uint8_t(alignment);
+
+  // Set the memory operand to GPD/GPQ and its id to VirtReg.
+  *out = BaseMem(OperandSignature::fromOpType(OperandType::kMem) |
+                 OperandSignature::fromMemBaseType(_gpSignature.regType()) |
+                 OperandSignature::fromBits(OperandSignature::kMemRegHomeFlag),
+                 vReg->id(), 0, 0);
+  return kErrorOk;
+}
+
+Error BaseCompiler::setStackSize(uint32_t virtId, uint32_t newSize, uint32_t newAlignment) {
+  if (!isVirtIdValid(virtId))
+    return DebugUtils::errored(kErrorInvalidVirtId);
+
+  if (newAlignment && !Support::isPowerOf2(newAlignment))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  if (newAlignment > 64)
+    newAlignment = 64;
+
+  VirtReg* vReg = virtRegById(virtId);
+  if (newSize)
+    vReg->_virtSize = newSize;
+
+  if (newAlignment)
+    vReg->_alignment = uint8_t(newAlignment);
+
+  // This is required if the RAPass is already running. There is a chance that a stack-slot has been already
+  // allocated and in that case it has to be updated as well, otherwise we would allocate wrong amount of memory.
+  RAWorkReg* workReg = vReg->_workReg;
+  if (workReg && workReg->_stackSlot) {
+    workReg->_stackSlot->_size = vReg->_virtSize;
+    workReg->_stackSlot->_alignment = vReg->_alignment;
+  }
+
+  return kErrorOk;
+}
+
+Error BaseCompiler::_newConst(BaseMem* out, ConstPoolScope scope, const void* data, size_t size) {
+  out->reset();
+
+  if (uint32_t(scope) > 1)
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  if (!_constPools[uint32_t(scope)])
+    ASMJIT_PROPAGATE(newConstPoolNode(&_constPools[uint32_t(scope)]));
+
+  ConstPoolNode* pool = _constPools[uint32_t(scope)];
+  size_t off;
+  Error err = pool->add(data, size, off);
+
+  if (ASMJIT_UNLIKELY(err))
+    return reportError(err);
+
+  *out = BaseMem(OperandSignature::fromOpType(OperandType::kMem) |
+                 OperandSignature::fromMemBaseType(RegType::kLabelTag) |
+                 OperandSignature::fromSize(uint32_t(size)),
+                 pool->labelId(), 0, int32_t(off));
+  return kErrorOk;
+}
+
+void BaseCompiler::rename(const BaseReg& reg, const char* fmt, ...) {
+  if (!reg.isVirtReg()) return;
+
+  VirtReg* vReg = virtRegById(reg.id());
+  if (!vReg) return;
+
+  if (fmt && fmt[0] != '\0') {
+    char buf[128];
+    va_list ap;
+
+    va_start(ap, fmt);
+    vsnprintf(buf, ASMJIT_ARRAY_SIZE(buf), fmt, ap);
+    va_end(ap);
+
+    vReg->_name.setData(&_dataZone, buf, SIZE_MAX);
+  }
+  else {
+    BaseCompiler_assignGenericName(this, vReg);
+  }
+}
+
+// BaseCompiler - Jump Annotations
+// ===============================
+
+Error BaseCompiler::newJumpNode(JumpNode** out, InstId instId, InstOptions instOptions, const Operand_& o0, JumpAnnotation* annotation) {
+  JumpNode* node = _allocator.allocT<JumpNode>();
+  uint32_t opCount = 1;
+
+  *out = node;
+  if (ASMJIT_UNLIKELY(!node))
+    return reportError(DebugUtils::errored(kErrorOutOfMemory));
+
+  node = new(node) JumpNode(this, instId, instOptions, opCount, annotation);
+  node->setOp(0, o0);
+  node->resetOpRange(opCount, JumpNode::kBaseOpCapacity);
+
+  return kErrorOk;
+}
+
+Error BaseCompiler::emitAnnotatedJump(InstId instId, const Operand_& o0, JumpAnnotation* annotation) {
+  InstOptions options = instOptions() | forcedInstOptions();
+  RegOnly extra = extraReg();
+  const char* comment = inlineComment();
+
+  resetInstOptions();
+  resetInlineComment();
+  resetExtraReg();
+
+  JumpNode* node;
+  ASMJIT_PROPAGATE(newJumpNode(&node, instId, options, o0, annotation));
+
+  node->setExtraReg(extra);
+  if (comment)
+    node->setInlineComment(static_cast<char*>(_dataZone.dup(comment, strlen(comment), true)));
+
+  addNode(node);
+  return kErrorOk;
+}
+
+JumpAnnotation* BaseCompiler::newJumpAnnotation() {
+  if (_jumpAnnotations.grow(&_allocator, 1) != kErrorOk) {
+    reportError(DebugUtils::errored(kErrorOutOfMemory));
+    return nullptr;
+  }
+
+  uint32_t id = _jumpAnnotations.size();
+  JumpAnnotation* jumpAnnotation = _allocator.newT<JumpAnnotation>(this, id);
+
+  if (!jumpAnnotation) {
+    reportError(DebugUtils::errored(kErrorOutOfMemory));
+    return nullptr;
+  }
+
+  _jumpAnnotations.appendUnsafe(jumpAnnotation);
+  return jumpAnnotation;
+}
+
+// BaseCompiler - Events
+// =====================
+
+Error BaseCompiler::onAttach(CodeHolder* code) noexcept {
+  ASMJIT_PROPAGATE(Base::onAttach(code));
+
+  const ArchTraits& archTraits = ArchTraits::byArch(code->arch());
+  RegType nativeRegType = Environment::is32Bit(code->arch()) ? RegType::kGp32 : RegType::kGp64;
+  _gpSignature = archTraits.regTypeToSignature(nativeRegType);
+
+  Error err = addPassT<GlobalConstPoolPass>();
+  if (ASMJIT_UNLIKELY(err)) {
+    onDetach(code);
+    return err;
+  }
+
+  return kErrorOk;
+}
+
+Error BaseCompiler::onDetach(CodeHolder* code) noexcept {
+  _func = nullptr;
+  _constPools[uint32_t(ConstPoolScope::kLocal)] = nullptr;
+  _constPools[uint32_t(ConstPoolScope::kGlobal)] = nullptr;
+
+  _vRegArray.reset();
+  _vRegZone.reset();
+
+  return Base::onDetach(code);
+}
+
+// FuncPass - Construction & Destruction
+// =====================================
+
+FuncPass::FuncPass(const char* name) noexcept
+  : Pass(name) {}
+
+// FuncPass - Run
+// ==============
+
+Error FuncPass::run(Zone* zone, Logger* logger) {
+  BaseNode* node = cb()->firstNode();
+  if (!node) return kErrorOk;
+
+  do {
+    if (node->type() == NodeType::kFunc) {
+      FuncNode* func = node->as<FuncNode>();
+      node = func->endNode();
+      ASMJIT_PROPAGATE(runOnFunction(zone, logger, func));
+    }
+
+    // Find a function by skipping all nodes that are not `NodeType::kFunc`.
+    do {
+      node = node->next();
+    } while (node && node->type() != NodeType::kFunc);
+  } while (node);
+
+  return kErrorOk;
+}
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
diff --git a/lib/lepton/asmjit/core/compiler.h b/lib/lepton/asmjit/core/compiler.h
new file mode 100644
index 0000000000..709fd952ac
--- /dev/null
+++ b/lib/lepton/asmjit/core/compiler.h
@@ -0,0 +1,737 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_COMPILER_H_INCLUDED
+#define ASMJIT_CORE_COMPILER_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/assembler.h"
+#include "../core/builder.h"
+#include "../core/constpool.h"
+#include "../core/compilerdefs.h"
+#include "../core/func.h"
+#include "../core/inst.h"
+#include "../core/operand.h"
+#include "../core/support.h"
+#include "../core/zone.h"
+#include "../core/zonevector.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+class JumpAnnotation;
+class JumpNode;
+class FuncNode;
+class FuncRetNode;
+class InvokeNode;
+
+//! \addtogroup asmjit_compiler
+//! \{
+
+//! Code emitter that uses virtual registers and performs register allocation.
+//!
+//! Compiler is a high-level code-generation tool that provides register allocation and automatic handling of function
+//! calling conventions. It was primarily designed for merging multiple parts of code into a function without worrying
+//! about registers and function calling conventions.
+//!
+//! BaseCompiler can be used, with a minimum effort, to handle 32-bit and 64-bit code generation within a single code
+//! base.
+//!
+//! BaseCompiler is based on BaseBuilder and contains all the features it provides. It means that the code it stores
+//! can be modified (removed, added, injected) and analyzed. When the code is finalized the compiler can emit the code
+//! into an Assembler to translate the abstract representation into a machine code.
+//!
+//! Check out architecture specific compilers for more details and examples:
+//!
+//!   - \ref x86::Compiler - X86/X64 compiler implementation.
+class ASMJIT_VIRTAPI BaseCompiler : public BaseBuilder {
+public:
+  ASMJIT_NONCOPYABLE(BaseCompiler)
+  typedef BaseBuilder Base;
+
+  //! \name Members
+  //! \{
+
+  //! Current function.
+  FuncNode* _func;
+  //! Allocates `VirtReg` objects.
+  Zone _vRegZone;
+  //! Stores array of `VirtReg` pointers.
+  ZoneVector<VirtReg*> _vRegArray;
+  //! Stores jump annotations.
+  ZoneVector<JumpAnnotation*> _jumpAnnotations;
+
+  //! Local and global constant pools.
+  //!
+  //! Local constant pool is flushed with each function, global constant pool is flushed only by \ref finalize().
+  ConstPoolNode* _constPools[2];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `BaseCompiler` instance.
+  ASMJIT_API BaseCompiler() noexcept;
+  //! Destroys the `BaseCompiler` instance.
+  ASMJIT_API virtual ~BaseCompiler() noexcept;
+
+  //! \}
+
+  //! \name Function Management
+  //! \{
+
+  //! Creates a new \ref FuncNode.
+  ASMJIT_API Error newFuncNode(FuncNode** ASMJIT_NONNULL(out), const FuncSignature& signature);
+  //! Creates a new \ref FuncNode adds it to the instruction stream.
+  ASMJIT_API Error addFuncNode(FuncNode** ASMJIT_NONNULL(out), const FuncSignature& signature);
+
+  //! Creates a new \ref FuncRetNode.
+  ASMJIT_API Error newFuncRetNode(FuncRetNode** ASMJIT_NONNULL(out), const Operand_& o0, const Operand_& o1);
+  //! Creates a new \ref FuncRetNode and adds it to the instruction stream.
+  ASMJIT_API Error addFuncRetNode(FuncRetNode** ASMJIT_NONNULL(out), const Operand_& o0, const Operand_& o1);
+
+  //! Returns the current function.
+  inline FuncNode* func() const noexcept { return _func; }
+
+  //! Creates a new \ref FuncNode with the given `signature` and returns it.
+  inline FuncNode* newFunc(const FuncSignature& signature) {
+    FuncNode* node;
+    newFuncNode(&node, signature);
+    return node;
+  }
+
+  //! Creates a new \ref FuncNode with the given `signature`, adds it to the instruction stream by using
+  //! the \ref addFunc(FuncNode*) overload, and returns it.
+  inline FuncNode* addFunc(const FuncSignature& signature) {
+    FuncNode* node;
+    addFuncNode(&node, signature);
+    return node;
+  }
+
+  //! Adds a function `node` to the instruction stream.
+  ASMJIT_API FuncNode* addFunc(FuncNode* ASMJIT_NONNULL(func));
+  //! Emits a sentinel that marks the end of the current function.
+  ASMJIT_API Error endFunc();
+
+#if !defined(ASMJIT_NO_DEPRECATED)
+  inline Error _setArg(size_t argIndex, size_t valueIndex, const BaseReg& reg);
+
+  //! Sets a function argument at `argIndex` to `reg`.
+  ASMJIT_DEPRECATED("Setting arguments through Compiler is deprecated, use FuncNode->setArg() instead")
+  inline Error setArg(size_t argIndex, const BaseReg& reg) { return _setArg(argIndex, 0, reg); }
+
+  //! Sets a function argument at `argIndex` at `valueIndex` to `reg`.
+  ASMJIT_DEPRECATED("Setting arguments through Compiler is deprecated, use FuncNode->setArg() instead")
+  inline Error setArg(size_t argIndex, size_t valueIndex, const BaseReg& reg) { return _setArg(argIndex, valueIndex, reg); }
+#endif
+
+  inline Error addRet(const Operand_& o0, const Operand_& o1) {
+    FuncRetNode* node;
+    return addFuncRetNode(&node, o0, o1);
+  }
+
+  //! \}
+
+  //! \name Function Invocation
+  //! \{
+
+  //! Creates a new \ref InvokeNode.
+  ASMJIT_API Error newInvokeNode(InvokeNode** ASMJIT_NONNULL(out), InstId instId, const Operand_& o0, const FuncSignature& signature);
+  //! Creates a new \ref InvokeNode and adds it to the instruction stream.
+  ASMJIT_API Error addInvokeNode(InvokeNode** ASMJIT_NONNULL(out), InstId instId, const Operand_& o0, const FuncSignature& signature);
+
+  //! \}
+
+  //! \name Virtual Registers
+  //! \{
+
+  //! Creates a new virtual register representing the given `typeId` and `signature`.
+  //!
+  //! \note This function is public, but it's not generally recommended to be used by AsmJit users, use architecture
+  //! specific `newReg()` functionality instead or functions like \ref _newReg() and \ref _newRegFmt().
+  ASMJIT_API Error newVirtReg(VirtReg** ASMJIT_NONNULL(out), TypeId typeId, OperandSignature signature, const char* name);
+
+  //! Creates a new virtual register of the given `typeId` and stores it to `out` operand.
+  ASMJIT_API Error _newReg(BaseReg* ASMJIT_NONNULL(out), TypeId typeId, const char* name = nullptr);
+
+  //! Creates a new virtual register of the given `typeId` and stores it to `out` operand.
+  //!
+  //! \note This version accepts a snprintf() format `fmt` followed by a variadic arguments.
+  ASMJIT_API Error _newRegFmt(BaseReg* ASMJIT_NONNULL(out), TypeId typeId, const char* fmt, ...);
+
+  //! Creates a new virtual register compatible with the provided reference register `ref`.
+  ASMJIT_API Error _newReg(BaseReg* ASMJIT_NONNULL(out), const BaseReg& ref, const char* name = nullptr);
+
+  //! Creates a new virtual register compatible with the provided reference register `ref`.
+  //!
+  //! \note This version accepts a snprintf() format `fmt` followed by a variadic arguments.
+  ASMJIT_API Error _newRegFmt(BaseReg* ASMJIT_NONNULL(out), const BaseReg& ref, const char* fmt, ...);
+
+  //! Tests whether the given `id` is a valid virtual register id.
+  inline bool isVirtIdValid(uint32_t id) const noexcept {
+    uint32_t index = Operand::virtIdToIndex(id);
+    return index < _vRegArray.size();
+  }
+  //! Tests whether the given `reg` is a virtual register having a valid id.
+  inline bool isVirtRegValid(const BaseReg& reg) const noexcept {
+    return isVirtIdValid(reg.id());
+  }
+
+  //! Returns \ref VirtReg associated with the given `id`.
+  inline VirtReg* virtRegById(uint32_t id) const noexcept {
+    ASMJIT_ASSERT(isVirtIdValid(id));
+    return _vRegArray[Operand::virtIdToIndex(id)];
+  }
+
+  //! Returns \ref VirtReg associated with the given `reg`.
+  inline VirtReg* virtRegByReg(const BaseReg& reg) const noexcept { return virtRegById(reg.id()); }
+
+  //! Returns \ref VirtReg associated with the given virtual register `index`.
+  //!
+  //! \note This is not the same as virtual register id. The conversion between id and its index is implemented
+  //! by \ref Operand_::virtIdToIndex() and \ref Operand_::indexToVirtId() functions.
+  inline VirtReg* virtRegByIndex(uint32_t index) const noexcept { return _vRegArray[index]; }
+
+  //! Returns an array of all virtual registers managed by the Compiler.
+  inline const ZoneVector<VirtReg*>& virtRegs() const noexcept { return _vRegArray; }
+
+  //! \name Stack
+  //! \{
+
+  //! Creates a new stack of the given `size` and `alignment` and stores it to `out`.
+  //!
+  //! \note `name` can be used to give the stack a name, for debugging purposes.
+  ASMJIT_API Error _newStack(BaseMem* ASMJIT_NONNULL(out), uint32_t size, uint32_t alignment, const char* name = nullptr);
+
+  //! Updates the stack size of a stack created by `_newStack()` by its `virtId`.
+  ASMJIT_API Error setStackSize(uint32_t virtId, uint32_t newSize, uint32_t newAlignment = 0);
+
+  //! Updates the stack size of a stack created by `_newStack()`.
+  inline Error setStackSize(const BaseMem& mem, uint32_t newSize, uint32_t newAlignment = 0) {
+    return setStackSize(mem.id(), newSize, newAlignment);
+  }
+
+  //! \}
+
+  //! \name Constants
+  //! \{
+
+  //! Creates a new constant of the given `scope` (see \ref ConstPoolScope).
+  //!
+  //! This function adds a constant of the given `size` to the built-in \ref ConstPool and stores the reference to that
+  //! constant to the `out` operand.
+  ASMJIT_API Error _newConst(BaseMem* ASMJIT_NONNULL(out), ConstPoolScope scope, const void* data, size_t size);
+
+  //! \}
+
+  //! \name Miscellaneous
+  //! \{
+
+  //! Rename the given virtual register `reg` to a formatted string `fmt`.
+  ASMJIT_API void rename(const BaseReg& reg, const char* fmt, ...);
+
+  //! \}
+
+  //! \name Jump Annotations
+  //! \{
+
+  inline const ZoneVector<JumpAnnotation*>& jumpAnnotations() const noexcept {
+    return _jumpAnnotations;
+  }
+
+  ASMJIT_API Error newJumpNode(JumpNode** ASMJIT_NONNULL(out), InstId instId, InstOptions instOptions, const Operand_& o0, JumpAnnotation* annotation);
+  ASMJIT_API Error emitAnnotatedJump(InstId instId, const Operand_& o0, JumpAnnotation* annotation);
+
+  //! Returns a new `JumpAnnotation` instance, which can be used to aggregate possible targets of a jump where the
+  //! target is not a label, for example to implement jump tables.
+  ASMJIT_API JumpAnnotation* newJumpAnnotation();
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+};
+
+//! Jump annotation used to annotate jumps.
+//!
+//! \ref BaseCompiler allows to emit jumps where the target is either register or memory operand. Such jumps cannot be
+//! trivially inspected, so instead of doing heuristics AsmJit allows to annotate such jumps with possible targets.
+//! Register allocator then uses the annotation to construct control-flow, which is then used by liveness analysis and
+//! other tools to prepare ground for register allocation.
+class JumpAnnotation {
+public:
+  ASMJIT_NONCOPYABLE(JumpAnnotation)
+
+  //! \name Members
+  //! \{
+
+  //! Compiler that owns this JumpAnnotation.
+  BaseCompiler* _compiler;
+  //! Annotation identifier.
+  uint32_t _annotationId;
+  //! Vector of label identifiers, see \ref labelIds().
+  ZoneVector<uint32_t> _labelIds;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline JumpAnnotation(BaseCompiler* ASMJIT_NONNULL(compiler), uint32_t annotationId) noexcept
+    : _compiler(compiler),
+      _annotationId(annotationId) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the compiler that owns this JumpAnnotation.
+  inline BaseCompiler* compiler() const noexcept { return _compiler; }
+  //! Returns the annotation id.
+  inline uint32_t annotationId() const noexcept { return _annotationId; }
+  //! Returns a vector of label identifiers that lists all targets of the jump.
+  const ZoneVector<uint32_t>& labelIds() const noexcept { return _labelIds; }
+
+  //! Tests whether the given `label` is a target of this JumpAnnotation.
+  inline bool hasLabel(const Label& label) const noexcept { return hasLabelId(label.id()); }
+  //! Tests whether the given `labelId` is a target of this JumpAnnotation.
+  inline bool hasLabelId(uint32_t labelId) const noexcept { return _labelIds.contains(labelId); }
+
+  //! \}
+
+  //! \name Annotation Building API
+  //! \{
+
+  //! Adds the `label` to the list of targets of this JumpAnnotation.
+  inline Error addLabel(const Label& label) noexcept { return addLabelId(label.id()); }
+  //! Adds the `labelId` to the list of targets of this JumpAnnotation.
+  inline Error addLabelId(uint32_t labelId) noexcept { return _labelIds.append(&_compiler->_allocator, labelId); }
+
+  //! \}
+};
+
+//! Jump instruction with \ref JumpAnnotation.
+//!
+//! \note This node should be only used to represent jump where the jump target cannot be deduced by examining
+//! instruction operands. For example if the jump target is register or memory location. This pattern is often
+//! used to perform indirect jumps that use jump table, e.g. to implement `switch{}` statement.
+class JumpNode : public InstNode {
+public:
+  ASMJIT_NONCOPYABLE(JumpNode)
+
+  //! \name Members
+  //! \{
+
+  JumpAnnotation* _annotation;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline JumpNode(BaseCompiler* ASMJIT_NONNULL(cc), InstId instId, InstOptions options, uint32_t opCount, JumpAnnotation* annotation) noexcept
+    : InstNode(cc, instId, options, opCount, kBaseOpCapacity),
+      _annotation(annotation) {
+    setType(NodeType::kJump);
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether this JumpNode has associated a \ref JumpAnnotation.
+  inline bool hasAnnotation() const noexcept { return _annotation != nullptr; }
+  //! Returns the \ref JumpAnnotation associated with this jump, or `nullptr`.
+  inline JumpAnnotation* annotation() const noexcept { return _annotation; }
+  //! Sets the \ref JumpAnnotation associated with this jump to `annotation`.
+  inline void setAnnotation(JumpAnnotation* annotation) noexcept { _annotation = annotation; }
+
+  //! \}
+};
+
+//! Function node represents a function used by \ref BaseCompiler.
+//!
+//! A function is composed of the following:
+//!
+//!   - Function entry, \ref FuncNode acts as a label, so the entry is implicit. To get the entry, simply use
+//!     \ref FuncNode::label(), which is the same as \ref LabelNode::label().
+//!
+//!   - Function exit, which is represented by \ref FuncNode::exitNode(). A helper function
+//!     \ref FuncNode::exitLabel() exists and returns an exit label instead of node.
+//!
+//!   - Function \ref FuncNode::endNode() sentinel. This node marks the end of a function - there should be no
+//!     code that belongs to the function after this node, but the Compiler doesn't enforce that at the moment.
+//!
+//!   - Function detail, see \ref FuncNode::detail().
+//!
+//!   - Function frame, see \ref FuncNode::frame().
+//!
+//!   - Function arguments mapped to virtual registers, see \ref FuncNode::argPacks().
+//!
+//! In a node list, the function and its body looks like the following:
+//!
+//! \code{.unparsed}
+//! [...]       - Anything before the function.
+//!
+//! [FuncNode]  - Entry point of the function, acts as a label as well.
+//!   <Prolog>  - Prolog inserted by the register allocator.
+//!   {...}     - Function body - user code basically.
+//! [ExitLabel] - Exit label
+//!   <Epilog>  - Epilog inserted by the register allocator.
+//!   <Return>  - Return inserted by the register allocator.
+//!   {...}     - Can contain data or user code (error handling, special cases, ...).
+//! [FuncEnd]   - End sentinel
+//!
+//! [...]       - Anything after the function.
+//! \endcode
+//!
+//! When a function is added to the instruction stream by \ref BaseCompiler::addFunc() it actually inserts 3 nodes
+//! (FuncNode, ExitLabel, and FuncEnd) and sets the current cursor to be FuncNode. When \ref BaseCompiler::endFunc()
+//! is called the cursor is set to FuncEnd. This guarantees that user can use ExitLabel as a marker after additional
+//! code or data can be placed, which is a common practice.
+class FuncNode : public LabelNode {
+public:
+  ASMJIT_NONCOPYABLE(FuncNode)
+
+  //! Arguments pack.
+  struct ArgPack {
+    RegOnly _data[Globals::kMaxValuePack];
+
+    inline void reset() noexcept {
+      for (size_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++)
+        _data[valueIndex].reset();
+    }
+
+    inline RegOnly& operator[](size_t valueIndex) noexcept { return _data[valueIndex]; }
+    inline const RegOnly& operator[](size_t valueIndex) const noexcept { return _data[valueIndex]; }
+  };
+
+  //! \name Members
+  //! \{
+
+  //! Function detail.
+  FuncDetail _funcDetail;
+  //! Function frame.
+  FuncFrame _frame;
+  //! Function exit label.
+  LabelNode* _exitNode;
+  //! Function end (sentinel).
+  SentinelNode* _end;
+  //! Argument packs.
+  ArgPack* _args;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `FuncNode` instance.
+  //!
+  //! Always use `BaseCompiler::addFunc()` to create a new `FuncNode`.
+  inline FuncNode(BaseBuilder* ASMJIT_NONNULL(cb)) noexcept
+    : LabelNode(cb),
+      _funcDetail(),
+      _frame(),
+      _exitNode(nullptr),
+      _end(nullptr),
+      _args(nullptr) {
+    setType(NodeType::kFunc);
+  }
+
+  //! \}
+
+  //! \{
+  //! \name Accessors
+
+  //! Returns function exit `LabelNode`.
+  inline LabelNode* exitNode() const noexcept { return _exitNode; }
+  //! Returns function exit label.
+  inline Label exitLabel() const noexcept { return _exitNode->label(); }
+
+  //! Returns "End of Func" sentinel node.
+  inline SentinelNode* endNode() const noexcept { return _end; }
+
+  //! Returns function detail.
+  inline FuncDetail& detail() noexcept { return _funcDetail; }
+  //! Returns function detail.
+  inline const FuncDetail& detail() const noexcept { return _funcDetail; }
+
+  //! Returns function frame.
+  inline FuncFrame& frame() noexcept { return _frame; }
+  //! Returns function frame.
+  inline const FuncFrame& frame() const noexcept { return _frame; }
+
+  //! Returns function attributes.
+  inline FuncAttributes attributes() const noexcept { return _frame.attributes(); }
+  //! Adds `attrs` to the function attributes.
+  inline void addAttributes(FuncAttributes attrs) noexcept { _frame.addAttributes(attrs); }
+
+  //! Returns arguments count.
+  inline uint32_t argCount() const noexcept { return _funcDetail.argCount(); }
+  //! Returns argument packs.
+  inline ArgPack* argPacks() const noexcept { return _args; }
+
+  //! Tests whether the function has a return value.
+  inline bool hasRet() const noexcept { return _funcDetail.hasRet(); }
+
+  //! Returns argument pack at `argIndex`.
+  inline ArgPack& argPack(size_t argIndex) const noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    return _args[argIndex];
+  }
+
+  //! Sets argument at `argIndex`.
+  inline void setArg(size_t argIndex, const BaseReg& vReg) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    _args[argIndex][0].init(vReg);
+  }
+
+  //! \overload
+  inline void setArg(size_t argIndex, const RegOnly& vReg) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    _args[argIndex][0].init(vReg);
+  }
+
+  //! Sets argument at `argIndex` and `valueIndex`.
+  inline void setArg(size_t argIndex, size_t valueIndex, const BaseReg& vReg) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    _args[argIndex][valueIndex].init(vReg);
+  }
+
+  //! \overload
+  inline void setArg(size_t argIndex, size_t valueIndex, const RegOnly& vReg) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    _args[argIndex][valueIndex].init(vReg);
+  }
+
+  //! Resets argument pack at `argIndex`.
+  inline void resetArg(size_t argIndex) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    _args[argIndex].reset();
+  }
+
+  //! Resets argument pack at `argIndex`.
+  inline void resetArg(size_t argIndex, size_t valueIndex) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    _args[argIndex][valueIndex].reset();
+  }
+
+  //! \}
+};
+
+//! Function return, used by \ref BaseCompiler.
+class FuncRetNode : public InstNode {
+public:
+  ASMJIT_NONCOPYABLE(FuncRetNode)
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `FuncRetNode` instance.
+  inline FuncRetNode(BaseBuilder* ASMJIT_NONNULL(cb)) noexcept : InstNode(cb, BaseInst::kIdAbstract, InstOptions::kNone, 0) {
+    _any._nodeType = NodeType::kFuncRet;
+  }
+
+  //! \}
+};
+
+//! Function invocation, used by \ref BaseCompiler.
+class InvokeNode : public InstNode {
+public:
+  ASMJIT_NONCOPYABLE(InvokeNode)
+
+  //! Operand pack provides multiple operands that can be associated with a single return value of function
+  //! argument. Sometims this is necessary to express an argument or return value that requires multiple
+  //! registers, for example 64-bit value in 32-bit mode or passing / returning homogeneous data structures.
+  struct OperandPack {
+    //! Operands.
+    Operand_ _data[Globals::kMaxValuePack];
+
+    //! Reset the pack by resetting all operands in the pack.
+    inline void reset() noexcept {
+      for (size_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++)
+        _data[valueIndex].reset();
+    }
+
+    //! Returns an operand at the given `valueIndex`.
+    inline Operand& operator[](size_t valueIndex) noexcept {
+      ASMJIT_ASSERT(valueIndex < Globals::kMaxValuePack);
+      return _data[valueIndex].as<Operand>();
+    }
+
+    //! Returns an operand at the given `valueIndex` (const).
+    const inline Operand& operator[](size_t valueIndex) const noexcept {
+      ASMJIT_ASSERT(valueIndex < Globals::kMaxValuePack);
+      return _data[valueIndex].as<Operand>();
+    }
+  };
+
+  //! \name Members
+  //! \{
+
+  //! Function detail.
+  FuncDetail _funcDetail;
+  //! Function return value(s).
+  OperandPack _rets;
+  //! Function arguments.
+  OperandPack* _args;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `InvokeNode` instance.
+  inline InvokeNode(BaseBuilder* ASMJIT_NONNULL(cb), InstId instId, InstOptions options) noexcept
+    : InstNode(cb, instId, options, kBaseOpCapacity),
+      _funcDetail(),
+      _args(nullptr) {
+    setType(NodeType::kInvoke);
+    _resetOps();
+    _rets.reset();
+    addFlags(NodeFlags::kIsRemovable);
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Sets the function signature.
+  inline Error init(const FuncSignature& signature, const Environment& environment) noexcept {
+    return _funcDetail.init(signature, environment);
+  }
+
+  //! Returns the function detail.
+  inline FuncDetail& detail() noexcept { return _funcDetail; }
+  //! Returns the function detail.
+  inline const FuncDetail& detail() const noexcept { return _funcDetail; }
+
+  //! Returns the target operand.
+  inline Operand& target() noexcept { return _opArray[0].as<Operand>(); }
+  //! \overload
+  inline const Operand& target() const noexcept { return _opArray[0].as<Operand>(); }
+
+  //! Returns the number of function return values.
+  inline bool hasRet() const noexcept { return _funcDetail.hasRet(); }
+  //! Returns the number of function arguments.
+  inline uint32_t argCount() const noexcept { return _funcDetail.argCount(); }
+
+  //! Returns operand pack representing function return value(s).
+  inline OperandPack& retPack() noexcept { return _rets; }
+  //! Returns operand pack representing function return value(s).
+  inline const OperandPack& retPack() const noexcept { return _rets; }
+
+  //! Returns the return value at the given `valueIndex`.
+  inline Operand& ret(size_t valueIndex = 0) noexcept { return _rets[valueIndex]; }
+  //! \overload
+  inline const Operand& ret(size_t valueIndex = 0) const noexcept { return _rets[valueIndex]; }
+
+  //! Returns operand pack representing function return value(s).
+  inline OperandPack& argPack(size_t argIndex) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    return _args[argIndex];
+  }
+  //! \overload
+  inline const OperandPack& argPack(size_t argIndex) const noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    return _args[argIndex];
+  }
+
+  //! Returns a function argument at the given `argIndex`.
+  inline Operand& arg(size_t argIndex, size_t valueIndex) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    return _args[argIndex][valueIndex];
+  }
+  //! \overload
+  inline const Operand& arg(size_t argIndex, size_t valueIndex) const noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    return _args[argIndex][valueIndex];
+  }
+
+  //! Sets the function return value at `i` to `op`.
+  inline void _setRet(size_t valueIndex, const Operand_& op) noexcept { _rets[valueIndex] = op; }
+  //! Sets the function argument at `i` to `op`.
+  inline void _setArg(size_t argIndex, size_t valueIndex, const Operand_& op) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    _args[argIndex][valueIndex] = op;
+  }
+
+  //! Sets the function return value at `valueIndex` to `reg`.
+  inline void setRet(size_t valueIndex, const BaseReg& reg) noexcept { _setRet(valueIndex, reg); }
+
+  //! Sets the first function argument in a value-pack at `argIndex` to `reg`.
+  inline void setArg(size_t argIndex, const BaseReg& reg) noexcept { _setArg(argIndex, 0, reg); }
+  //! Sets the first function argument in a value-pack at `argIndex` to `imm`.
+  inline void setArg(size_t argIndex, const Imm& imm) noexcept { _setArg(argIndex, 0, imm); }
+
+  //! Sets the function argument at `argIndex` and `valueIndex` to `reg`.
+  inline void setArg(size_t argIndex, size_t valueIndex, const BaseReg& reg) noexcept { _setArg(argIndex, valueIndex, reg); }
+  //! Sets the function argument at `argIndex` and `valueIndex` to `imm`.
+  inline void setArg(size_t argIndex, size_t valueIndex, const Imm& imm) noexcept { _setArg(argIndex, valueIndex, imm); }
+
+  //! \}
+};
+
+//! Function pass extends \ref Pass with \ref FuncPass::runOnFunction().
+class ASMJIT_VIRTAPI FuncPass : public Pass {
+public:
+  ASMJIT_NONCOPYABLE(FuncPass)
+  typedef Pass Base;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API FuncPass(const char* name) noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the associated `BaseCompiler`.
+  inline BaseCompiler* cc() const noexcept { return static_cast<BaseCompiler*>(_cb); }
+
+  //! \}
+
+  //! \name Pass Interface
+  //! \{
+
+  //! Calls `runOnFunction()` on each `FuncNode` node found.
+  ASMJIT_API Error run(Zone* zone, Logger* logger) override;
+
+  //! Called once per `FuncNode`.
+  virtual Error runOnFunction(Zone* zone, Logger* logger, FuncNode* func) = 0;
+
+  //! \}
+};
+
+#if !defined(ASMJIT_NO_DEPRECATED)
+inline Error BaseCompiler::_setArg(size_t argIndex, size_t valueIndex, const BaseReg& reg) {
+  FuncNode* func = _func;
+
+  if (ASMJIT_UNLIKELY(!func))
+    return reportError(DebugUtils::errored(kErrorInvalidState));
+
+  func->setArg(argIndex, valueIndex, reg);
+  return kErrorOk;
+}
+#endif
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_CORE_COMPILER_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/compilerdefs.h b/lib/lepton/asmjit/core/compilerdefs.h
new file mode 100644
index 0000000000..1870e688bc
--- /dev/null
+++ b/lib/lepton/asmjit/core/compilerdefs.h
@@ -0,0 +1,173 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_COMPILERDEFS_H_INCLUDED
+#define ASMJIT_CORE_COMPILERDEFS_H_INCLUDED
+
+#include "../core/api-config.h"
+#include "../core/operand.h"
+#include "../core/type.h"
+#include "../core/zonestring.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+class RAWorkReg;
+
+//! \addtogroup asmjit_compiler
+//! \{
+
+//! Virtual register data, managed by \ref BaseCompiler.
+class VirtReg {
+public:
+  ASMJIT_NONCOPYABLE(VirtReg)
+
+  //! \name Members
+  //! \{
+
+  //! Virtual register signature.
+  OperandSignature _signature {};
+  //! Virtual register id.
+  uint32_t _id = 0;
+  //! Virtual register size (can be smaller than `_signature._size`).
+  uint32_t _virtSize = 0;
+  //! Virtual register alignment (for spilling).
+  uint8_t _alignment = 0;
+  //! Type-id.
+  TypeId _typeId = TypeId::kVoid;
+  //! Virtual register weight for alloc/spill decisions.
+  uint8_t _weight = 1;
+  //! True if this is a fixed register, never reallocated.
+  uint8_t _isFixed : 1;
+  //! True if the virtual register is only used as a stack (never accessed as register).
+  uint8_t _isStack : 1;
+  //! True if this virtual register has assigned stack offset (can be only valid after register allocation pass).
+  uint8_t _hasStackSlot : 1;
+  uint8_t _reservedBits : 5;
+
+  //! Stack offset assigned by the register allocator relative to stack pointer (can be negative as well).
+  int32_t _stackOffset = 0;
+
+  //! Reserved for future use (padding).
+  uint32_t _reservedU32 = 0;
+
+  //! Virtual register name (user provided or automatically generated).
+  ZoneString<16> _name {};
+
+  // The following members are used exclusively by RAPass. They are initialized when the VirtReg is created to
+  // null pointers and then changed during RAPass execution. RAPass sets them back to NULL before it returns.
+
+  //! Reference to `RAWorkReg`, used during register allocation.
+  RAWorkReg* _workReg = nullptr;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline VirtReg(OperandSignature signature, uint32_t id, uint32_t virtSize, uint32_t alignment, TypeId typeId) noexcept
+    : _signature(signature),
+      _id(id),
+      _virtSize(virtSize),
+      _alignment(uint8_t(alignment)),
+      _typeId(typeId),
+      _isFixed(false),
+      _isStack(false),
+      _hasStackSlot(false),
+      _reservedBits(0),
+      _stackOffset(0),
+      _reservedU32(0) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the virtual register id.
+  inline uint32_t id() const noexcept { return _id; }
+
+  //! Returns the virtual register name.
+  inline const char* name() const noexcept { return _name.data(); }
+  //! Returns the size of the virtual register name.
+  inline uint32_t nameSize() const noexcept { return _name.size(); }
+
+  //! Returns a register signature of this virtual register.
+  inline OperandSignature signature() const noexcept { return _signature; }
+  //! Returns a virtual register type (maps to the physical register type as well).
+  inline RegType type() const noexcept { return _signature.regType(); }
+  //! Returns a virtual register group (maps to the physical register group as well).
+  inline RegGroup group() const noexcept { return _signature.regGroup(); }
+
+  //! Returns a real size of the register this virtual register maps to.
+  //!
+  //! For example if this is a 128-bit SIMD register used for a scalar single precision floating point value then
+  //! its virtSize would be 4, however, the `regSize` would still say 16 (128-bits), because it's the smallest size
+  //! of that register type.
+  inline uint32_t regSize() const noexcept { return _signature.size(); }
+
+  //! Returns the virtual register size.
+  //!
+  //! The virtual register size describes how many bytes the virtual register needs to store its content. It can be
+  //! smaller than the physical register size, see `regSize()`.
+  inline uint32_t virtSize() const noexcept { return _virtSize; }
+
+  //! Returns the virtual register alignment.
+  inline uint32_t alignment() const noexcept { return _alignment; }
+
+  //! Returns the virtual register type id.
+  inline TypeId typeId() const noexcept { return _typeId; }
+
+  //! Returns the virtual register weight - the register allocator can use it as explicit hint for alloc/spill
+  //! decisions.
+  inline uint32_t weight() const noexcept { return _weight; }
+  //! Sets the virtual register weight (0 to 255) - the register allocator can use it as explicit hint for
+  //! alloc/spill decisions and initial bin-packing.
+  inline void setWeight(uint32_t weight) noexcept { _weight = uint8_t(weight); }
+
+  //! Returns whether the virtual register is always allocated to a fixed physical register (and never reallocated).
+  //!
+  //! \note This is only used for special purposes and it's mostly internal.
+  inline bool isFixed() const noexcept { return bool(_isFixed); }
+
+  //! Tests whether the virtual register is in fact a stack that only uses the virtual register id.
+  //!
+  //! \note It's an error if a stack is accessed as a register.
+  inline bool isStack() const noexcept { return bool(_isStack); }
+
+  //! Tests whether this virtual register (or stack) has assigned a stack offset.
+  //!
+  //! If this is a virtual register that was never allocated on stack, it would return false, otherwise if
+  //! it's a virtual register that was spilled or explicitly allocated stack, the return value would be true.
+  inline bool hasStackSlot() const noexcept { return bool(_hasStackSlot); }
+
+  //! Assigns a stack offset of this virtual register to `stackOffset` and sets `_hasStackSlot` to true.
+  inline void assignStackSlot(int32_t stackOffset) noexcept {
+    _hasStackSlot = 1;
+    _stackOffset = stackOffset;
+  }
+
+  //! Returns a stack offset associated with a virtual register or explicit stack allocation.
+  //!
+  //! \note Always verify that the stack offset has been assigned by calling \ref hasStackSlot(). The return
+  //! value will be zero when the stack offset was not assigned.
+  inline int32_t stackOffset() const noexcept { return _stackOffset; }
+
+  //! Tests whether the virtual register has an associated `RAWorkReg` at the moment.
+  inline bool hasWorkReg() const noexcept { return _workReg != nullptr; }
+  //! Returns an associated RAWorkReg with this virtual register (only valid during register allocation).
+  inline RAWorkReg* workReg() const noexcept { return _workReg; }
+  //! Associates a RAWorkReg with this virtual register (used by register allocator).
+  inline void setWorkReg(RAWorkReg* workReg) noexcept { _workReg = workReg; }
+  //! Reset the RAWorkReg association (used by register allocator).
+  inline void resetWorkReg() noexcept { _workReg = nullptr; }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_COMPILERDEFS_H_INCLUDED
+
diff --git a/lib/lepton/asmjit/core/constpool.cpp b/lib/lepton/asmjit/core/constpool.cpp
new file mode 100644
index 0000000000..ad5fe4f2fc
--- /dev/null
+++ b/lib/lepton/asmjit/core/constpool.cpp
@@ -0,0 +1,363 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/constpool.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// ConstPool - Construction & Destruction
+// ======================================
+
+ConstPool::ConstPool(Zone* zone) noexcept { reset(zone); }
+ConstPool::~ConstPool() noexcept {}
+
+// ConstPool - Reset
+// =================
+
+void ConstPool::reset(Zone* zone) noexcept {
+  _zone = zone;
+
+  size_t dataSize = 1;
+  for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(_tree); i++) {
+    _tree[i].reset();
+    _tree[i].setDataSize(dataSize);
+    _gaps[i] = nullptr;
+    dataSize <<= 1;
+  }
+
+  _gapPool = nullptr;
+  _size = 0;
+  _alignment = 0;
+  _minItemSize = 0;
+}
+
+// ConstPool - Operations
+// ======================
+
+static inline ConstPool::Gap* ConstPool_allocGap(ConstPool* self) noexcept {
+  ConstPool::Gap* gap = self->_gapPool;
+  if (!gap)
+    return self->_zone->allocT<ConstPool::Gap>();
+
+  self->_gapPool = gap->_next;
+  return gap;
+}
+
+static inline void ConstPool_freeGap(ConstPool* self, ConstPool::Gap* gap) noexcept {
+  gap->_next = self->_gapPool;
+  self->_gapPool = gap;
+}
+
+static void ConstPool_addGap(ConstPool* self, size_t offset, size_t size) noexcept {
+  ASMJIT_ASSERT(size > 0);
+
+  while (size > 0) {
+    size_t gapIndex;
+    size_t gapSize;
+
+    if (size >= 32 && Support::isAligned<size_t>(offset, 32)) {
+      gapIndex = ConstPool::kIndex32;
+      gapSize = 32;
+    }
+    else if (size >= 16 && Support::isAligned<size_t>(offset, 16)) {
+      gapIndex = ConstPool::kIndex16;
+      gapSize = 16;
+    }
+    else if (size >= 8 && Support::isAligned<size_t>(offset, 8)) {
+      gapIndex = ConstPool::kIndex8;
+      gapSize = 8;
+    }
+    else if (size >= 4 && Support::isAligned<size_t>(offset, 4)) {
+      gapIndex = ConstPool::kIndex4;
+      gapSize = 4;
+    }
+    else if (size >= 2 && Support::isAligned<size_t>(offset, 2)) {
+      gapIndex = ConstPool::kIndex2;
+      gapSize = 2;
+    }
+    else {
+      gapIndex = ConstPool::kIndex1;
+      gapSize = 1;
+    }
+
+    // We don't have to check for errors here, if this failed nothing really happened (just the gap won't be
+    // visible) and it will fail again at place where the same check would generate `kErrorOutOfMemory` error.
+    ConstPool::Gap* gap = ConstPool_allocGap(self);
+    if (!gap)
+      return;
+
+    gap->_next = self->_gaps[gapIndex];
+    self->_gaps[gapIndex] = gap;
+
+    gap->_offset = offset;
+    gap->_size = gapSize;
+
+    offset += gapSize;
+    size -= gapSize;
+  }
+}
+
+Error ConstPool::add(const void* data, size_t size, size_t& dstOffset) noexcept {
+  size_t treeIndex;
+
+  if (size == 64)
+    treeIndex = kIndex64;
+  else if (size == 32)
+    treeIndex = kIndex32;
+  else if (size == 16)
+    treeIndex = kIndex16;
+  else if (size == 8)
+    treeIndex = kIndex8;
+  else if (size == 4)
+    treeIndex = kIndex4;
+  else if (size == 2)
+    treeIndex = kIndex2;
+  else if (size == 1)
+    treeIndex = kIndex1;
+  else
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  ConstPool::Node* node = _tree[treeIndex].get(data);
+  if (node) {
+    dstOffset = node->_offset;
+    return kErrorOk;
+  }
+
+  // Before incrementing the current offset try if there is a gap that can be used for the requested data.
+  size_t offset = ~size_t(0);
+  size_t gapIndex = treeIndex;
+
+  while (gapIndex != kIndexCount - 1) {
+    ConstPool::Gap* gap = _gaps[treeIndex];
+
+    // Check if there is a gap.
+    if (gap) {
+      size_t gapOffset = gap->_offset;
+      size_t gapSize = gap->_size;
+
+      // Destroy the gap for now.
+      _gaps[treeIndex] = gap->_next;
+      ConstPool_freeGap(this, gap);
+
+      offset = gapOffset;
+      ASMJIT_ASSERT(Support::isAligned<size_t>(offset, size));
+
+      gapSize -= size;
+      if (gapSize > 0)
+        ConstPool_addGap(this, gapOffset, gapSize);
+    }
+
+    gapIndex++;
+  }
+
+  if (offset == ~size_t(0)) {
+    // Get how many bytes have to be skipped so the address is aligned accordingly to the 'size'.
+    size_t diff = Support::alignUpDiff<size_t>(_size, size);
+
+    if (diff != 0) {
+      ConstPool_addGap(this, _size, diff);
+      _size += diff;
+    }
+
+    offset = _size;
+    _size += size;
+  }
+
+  // Add the initial node to the right index.
+  node = ConstPool::Tree::_newNode(_zone, data, size, offset, false);
+  if (ASMJIT_UNLIKELY(!node))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  _tree[treeIndex].insert(node);
+  _alignment = Support::max<size_t>(_alignment, size);
+
+  dstOffset = offset;
+
+  // Now create a bunch of shared constants that are based on the data pattern. We stop at size 4,
+  // it probably doesn't make sense to split constants down to 1 byte.
+  size_t pCount = 1;
+  size_t smallerSize = size;
+
+  while (smallerSize > 4) {
+    pCount <<= 1;
+    smallerSize >>= 1;
+
+    ASMJIT_ASSERT(treeIndex != 0);
+    treeIndex--;
+
+    const uint8_t* pData = static_cast<const uint8_t*>(data);
+    for (size_t i = 0; i < pCount; i++, pData += smallerSize) {
+      node = _tree[treeIndex].get(pData);
+      if (node) continue;
+
+      node = ConstPool::Tree::_newNode(_zone, pData, smallerSize, offset + (i * smallerSize), true);
+      _tree[treeIndex].insert(node);
+    }
+  }
+
+  if (_minItemSize == 0)
+    _minItemSize = size;
+  else
+    _minItemSize = Support::min(_minItemSize, size);
+
+  return kErrorOk;
+}
+
+// ConstPool - Reset
+// =================
+
+struct ConstPoolFill {
+  inline ConstPoolFill(uint8_t* dst, size_t dataSize) noexcept :
+    _dst(dst),
+    _dataSize(dataSize) {}
+
+  inline void operator()(const ConstPool::Node* node) noexcept {
+    if (!node->_shared)
+      memcpy(_dst + node->_offset, node->data(), _dataSize);
+  }
+
+  uint8_t* _dst;
+  size_t _dataSize;
+};
+
+void ConstPool::fill(void* dst) const noexcept {
+  // Clears possible gaps, asmjit should never emit garbage to the output.
+  memset(dst, 0, _size);
+
+  ConstPoolFill filler(static_cast<uint8_t*>(dst), 1);
+  for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(_tree); i++) {
+    _tree[i].forEach(filler);
+    filler._dataSize <<= 1;
+  }
+}
+
+// ConstPool - Tests
+// =================
+
+#if defined(ASMJIT_TEST)
+UNIT(const_pool) {
+  Zone zone(32384 - Zone::kBlockOverhead);
+  ConstPool pool(&zone);
+
+  uint32_t i;
+  uint32_t kCount = BrokenAPI::hasArg("--quick") ? 1000 : 1000000;
+
+  INFO("Adding %u constants to the pool", kCount);
+  {
+    size_t prevOffset;
+    size_t curOffset;
+    uint64_t c = 0x0101010101010101u;
+
+    EXPECT(pool.add(&c, 8, prevOffset) == kErrorOk);
+    EXPECT(prevOffset == 0);
+
+    for (i = 1; i < kCount; i++) {
+      c++;
+      EXPECT(pool.add(&c, 8, curOffset) == kErrorOk);
+      EXPECT(prevOffset + 8 == curOffset);
+      EXPECT(pool.size() == (i + 1) * 8);
+      prevOffset = curOffset;
+    }
+
+    EXPECT(pool.alignment() == 8);
+  }
+
+  INFO("Retrieving %u constants from the pool", kCount);
+  {
+    uint64_t c = 0x0101010101010101u;
+
+    for (i = 0; i < kCount; i++) {
+      size_t offset;
+      EXPECT(pool.add(&c, 8, offset) == kErrorOk);
+      EXPECT(offset == i * 8);
+      c++;
+    }
+  }
+
+  INFO("Checking if the constants were split into 4-byte patterns");
+  {
+    uint32_t c = 0x01010101;
+    for (i = 0; i < kCount; i++) {
+      size_t offset;
+      EXPECT(pool.add(&c, 4, offset) == kErrorOk);
+      EXPECT(offset == i * 8);
+      c++;
+    }
+  }
+
+  INFO("Adding 2 byte constant to misalign the current offset");
+  {
+    uint16_t c = 0xFFFF;
+    size_t offset;
+
+    EXPECT(pool.add(&c, 2, offset) == kErrorOk);
+    EXPECT(offset == kCount * 8);
+    EXPECT(pool.alignment() == 8);
+  }
+
+  INFO("Adding 8 byte constant to check if pool gets aligned again");
+  {
+    uint64_t c = 0xFFFFFFFFFFFFFFFFu;
+    size_t offset;
+
+    EXPECT(pool.add(&c, 8, offset) == kErrorOk);
+    EXPECT(offset == kCount * 8 + 8);
+  }
+
+  INFO("Adding 2 byte constant to verify the gap is filled");
+  {
+    uint16_t c = 0xFFFE;
+    size_t offset;
+
+    EXPECT(pool.add(&c, 2, offset) == kErrorOk);
+    EXPECT(offset == kCount * 8 + 2);
+    EXPECT(pool.alignment() == 8);
+  }
+
+  INFO("Checking reset functionality");
+  {
+    pool.reset(&zone);
+    zone.reset();
+
+    EXPECT(pool.size() == 0);
+    EXPECT(pool.alignment() == 0);
+  }
+
+  INFO("Checking pool alignment when combined constants are added");
+  {
+    uint8_t bytes[32] = { 0 };
+    size_t offset;
+
+    pool.add(bytes, 1, offset);
+    EXPECT(pool.size() == 1);
+    EXPECT(pool.alignment() == 1);
+    EXPECT(offset == 0);
+
+    pool.add(bytes, 2, offset);
+    EXPECT(pool.size() == 4);
+    EXPECT(pool.alignment() == 2);
+    EXPECT(offset == 2);
+
+    pool.add(bytes, 4, offset);
+    EXPECT(pool.size() == 8);
+    EXPECT(pool.alignment() == 4);
+    EXPECT(offset == 4);
+
+    pool.add(bytes, 4, offset);
+    EXPECT(pool.size() == 8);
+    EXPECT(pool.alignment() == 4);
+    EXPECT(offset == 4);
+
+    pool.add(bytes, 32, offset);
+    EXPECT(pool.size() == 64);
+    EXPECT(pool.alignment() == 32);
+    EXPECT(offset == 32);
+  }
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/constpool.h b/lib/lepton/asmjit/core/constpool.h
new file mode 100644
index 0000000000..32b84b1065
--- /dev/null
+++ b/lib/lepton/asmjit/core/constpool.h
@@ -0,0 +1,250 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_CONSTPOOL_H_INCLUDED
+#define ASMJIT_CORE_CONSTPOOL_H_INCLUDED
+
+#include "../core/support.h"
+#include "../core/zone.h"
+#include "../core/zonetree.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_utilities
+//! \{
+
+//! Constant pool scope.
+enum class ConstPoolScope : uint32_t {
+  //! Local constant, always embedded right after the current function.
+  kLocal = 0,
+  //! Global constant, embedded at the end of the currently compiled code.
+  kGlobal = 1,
+
+  //! Maximum value of `ConstPoolScope`.
+  kMaxValue = kGlobal
+};
+
+//! Constant pool.
+class ConstPool {
+public:
+  ASMJIT_NONCOPYABLE(ConstPool)
+
+  //! \cond INTERNAL
+
+  //! Index of a given size in const-pool table.
+  enum Index : uint32_t {
+    kIndex1 = 0,
+    kIndex2 = 1,
+    kIndex4 = 2,
+    kIndex8 = 3,
+    kIndex16 = 4,
+    kIndex32 = 5,
+    kIndex64 = 6,
+    kIndexCount = 7
+  };
+
+  //! Zone-allocated const-pool gap created by two differently aligned constants.
+  struct Gap {
+    //! Pointer to the next gap
+    Gap* _next;
+    //! Offset of the gap.
+    size_t _offset;
+    //! Remaining bytes of the gap (basically a gap size).
+    size_t _size;
+  };
+
+  //! Zone-allocated const-pool node.
+  class Node : public ZoneTreeNodeT<Node> {
+  public:
+    ASMJIT_NONCOPYABLE(Node)
+
+    //! If this constant is shared with another.
+    uint32_t _shared : 1;
+    //! Data offset from the beginning of the pool.
+    uint32_t _offset;
+
+    inline Node(size_t offset, bool shared) noexcept
+      : ZoneTreeNodeT<Node>(),
+        _shared(shared),
+        _offset(uint32_t(offset)) {}
+
+    inline void* data() const noexcept {
+      return static_cast<void*>(const_cast<ConstPool::Node*>(this) + 1);
+    }
+  };
+
+  //! Data comparer used internally.
+  class Compare {
+  public:
+    size_t _dataSize;
+
+    inline Compare(size_t dataSize) noexcept
+      : _dataSize(dataSize) {}
+
+    inline int operator()(const Node& a, const Node& b) const noexcept {
+      return ::memcmp(a.data(), b.data(), _dataSize);
+    }
+
+    inline int operator()(const Node& a, const void* data) const noexcept {
+      return ::memcmp(a.data(), data, _dataSize);
+    }
+  };
+
+  //! Zone-allocated const-pool tree.
+  struct Tree {
+    //! RB tree.
+    ZoneTree<Node> _tree;
+    //! Size of the tree (number of nodes).
+    size_t _size;
+    //! Size of the data.
+    size_t _dataSize;
+
+    inline explicit Tree(size_t dataSize = 0) noexcept
+      : _tree(),
+        _size(0),
+        _dataSize(dataSize) {}
+
+    inline void reset() noexcept {
+      _tree.reset();
+      _size = 0;
+    }
+
+    inline bool empty() const noexcept { return _size == 0; }
+    inline size_t size() const noexcept { return _size; }
+
+    inline void setDataSize(size_t dataSize) noexcept {
+      ASMJIT_ASSERT(empty());
+      _dataSize = dataSize;
+    }
+
+    inline Node* get(const void* data) noexcept {
+      Compare cmp(_dataSize);
+      return _tree.get(data, cmp);
+    }
+
+    inline void insert(Node* node) noexcept {
+      Compare cmp(_dataSize);
+      _tree.insert(node, cmp);
+      _size++;
+    }
+
+    template<typename Visitor>
+    inline void forEach(Visitor& visitor) const noexcept {
+      Node* node = _tree.root();
+      if (!node) return;
+
+      Node* stack[Globals::kMaxTreeHeight];
+      size_t top = 0;
+
+      for (;;) {
+        Node* left = node->left();
+        if (left != nullptr) {
+          ASMJIT_ASSERT(top != Globals::kMaxTreeHeight);
+          stack[top++] = node;
+
+          node = left;
+          continue;
+        }
+
+        for (;;) {
+          visitor(node);
+          node = node->right();
+
+          if (node != nullptr)
+            break;
+
+          if (top == 0)
+            return;
+
+          node = stack[--top];
+        }
+      }
+    }
+
+    static inline Node* _newNode(Zone* zone, const void* data, size_t size, size_t offset, bool shared) noexcept {
+      Node* node = zone->allocT<Node>(sizeof(Node) + size);
+      if (ASMJIT_UNLIKELY(!node)) return nullptr;
+
+      node = new(node) Node(offset, shared);
+      memcpy(node->data(), data, size);
+      return node;
+    }
+  };
+
+  //! \endcond
+
+  //! \name Members
+  //! \{
+
+  //! Zone allocator.
+  Zone* _zone;
+  //! Tree per size.
+  Tree _tree[kIndexCount];
+  //! Gaps per size.
+  Gap* _gaps[kIndexCount];
+  //! Gaps pool
+  Gap* _gapPool;
+
+  //! Size of the pool (in bytes).
+  size_t _size;
+  //! Required pool alignment.
+  size_t _alignment;
+  //! Minimum item size in the pool.
+  size_t _minItemSize;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API ConstPool(Zone* zone) noexcept;
+  ASMJIT_API ~ConstPool() noexcept;
+
+  ASMJIT_API void reset(Zone* zone) noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether the constant-pool is empty.
+  inline bool empty() const noexcept { return _size == 0; }
+  //! Returns the size of the constant-pool in bytes.
+  inline size_t size() const noexcept { return _size; }
+  //! Returns minimum alignment.
+  inline size_t alignment() const noexcept { return _alignment; }
+  //! Returns the minimum size of all items added to the constant pool.
+  inline size_t minItemSize() const noexcept { return _minItemSize; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Adds a constant to the constant pool.
+  //!
+  //! The constant must have known size, which is 1, 2, 4, 8, 16 or 32 bytes. The constant is added to the pool only
+  //! if it doesn't not exist, otherwise cached value is returned.
+  //!
+  //! AsmJit is able to subdivide added constants, so for example if you add 8-byte constant 0x1122334455667788 it
+  //! will create the following slots:
+  //!
+  //!   8-byte: 0x1122334455667788
+  //!   4-byte: 0x11223344, 0x55667788
+  //!
+  //! The reason is that when combining MMX/SSE/AVX code some patterns are used frequently. However, AsmJit is not
+  //! able to reallocate a constant that has been already added. For example if you try to add 4-byte constant and
+  //! then 8-byte constant having the same 4-byte pattern as the previous one, two independent slots will be used.
+  ASMJIT_API Error add(const void* data, size_t size, size_t& dstOffset) noexcept;
+
+  //! Fills the destination with the content of this constant pool.
+  ASMJIT_API void fill(void* dst) const noexcept;
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_CONSTPOOL_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/cpuinfo.cpp b/lib/lepton/asmjit/core/cpuinfo.cpp
new file mode 100644
index 0000000000..7bf7407f00
--- /dev/null
+++ b/lib/lepton/asmjit/core/cpuinfo.cpp
@@ -0,0 +1,1162 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/cpuinfo.h"
+#include "../core/support.h"
+
+#if !defined(_WIN32)
+  #include <errno.h>
+  #include <sys/utsname.h>
+  #include <unistd.h>
+#endif
+
+// Required by `getauxval()` on Linux.
+#if defined(__linux__)
+  #include <sys/auxv.h>
+#endif
+
+//! Required to detect CPU and features on Apple platforms.
+#if defined(__APPLE__)
+  #include <mach/machine.h>
+  #include <sys/types.h>
+  #include <sys/sysctl.h>
+#endif
+
+// Required by `__cpuidex()` and `_xgetbv()`.
+#if defined(_MSC_VER)
+  #include <intrin.h>
+#endif
+
+ASMJIT_BEGIN_NAMESPACE
+
+// CpuInfo - Detect - HW-Thread Count
+// ==================================
+
+#if defined(_WIN32)
+static inline uint32_t detectHWThreadCount() noexcept {
+  SYSTEM_INFO info;
+  ::GetSystemInfo(&info);
+  return info.dwNumberOfProcessors;
+}
+#elif defined(_SC_NPROCESSORS_ONLN)
+static inline uint32_t detectHWThreadCount() noexcept {
+  long res = ::sysconf(_SC_NPROCESSORS_ONLN);
+  return res <= 0 ? uint32_t(1) : uint32_t(res);
+}
+#else
+static inline uint32_t detectHWThreadCount() noexcept {
+  return 1;
+}
+#endif
+
+// CpuInfo - Detect - X86
+// ======================
+
+#if ASMJIT_ARCH_X86
+
+struct cpuid_t { uint32_t eax, ebx, ecx, edx; };
+struct xgetbv_t { uint32_t eax, edx; };
+
+// Executes `cpuid` instruction.
+static inline void cpuidQuery(cpuid_t* out, uint32_t inEax, uint32_t inEcx = 0) noexcept {
+#if defined(_MSC_VER)
+  __cpuidex(reinterpret_cast<int*>(out), inEax, inEcx);
+#elif defined(__GNUC__) && ASMJIT_ARCH_X86 == 32
+  __asm__ __volatile__(
+    "mov %%ebx, %%edi\n"
+    "cpuid\n"
+    "xchg %%edi, %%ebx\n" : "=a"(out->eax), "=D"(out->ebx), "=c"(out->ecx), "=d"(out->edx) : "a"(inEax), "c"(inEcx));
+#elif defined(__GNUC__) && ASMJIT_ARCH_X86 == 64
+  __asm__ __volatile__(
+    "mov %%rbx, %%rdi\n"
+    "cpuid\n"
+    "xchg %%rdi, %%rbx\n" : "=a"(out->eax), "=D"(out->ebx), "=c"(out->ecx), "=d"(out->edx) : "a"(inEax), "c"(inEcx));
+#else
+  #error "[asmjit] x86::cpuidQuery() - Unsupported compiler."
+#endif
+}
+
+// Executes 'xgetbv' instruction.
+static inline void xgetbvQuery(xgetbv_t* out, uint32_t inEcx) noexcept {
+#if defined(_MSC_VER)
+  uint64_t value = _xgetbv(inEcx);
+  out->eax = uint32_t(value & 0xFFFFFFFFu);
+  out->edx = uint32_t(value >> 32);
+#elif defined(__GNUC__)
+  uint32_t outEax;
+  uint32_t outEdx;
+
+  // Replaced, because the world is not perfect:
+  //   __asm__ __volatile__("xgetbv" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
+  __asm__ __volatile__(".byte 0x0F, 0x01, 0xD0" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
+
+  out->eax = outEax;
+  out->edx = outEdx;
+#else
+  out->eax = 0;
+  out->edx = 0;
+#endif
+}
+
+// Map a 12-byte vendor string returned by `cpuid` into a `CpuInfo::Vendor` ID.
+static inline void simplifyCpuVendor(CpuInfo& cpu, uint32_t d0, uint32_t d1, uint32_t d2) noexcept {
+  struct Vendor {
+    char normalized[8];
+    union { char text[12]; uint32_t d[3]; };
+  };
+
+  static const Vendor table[] = {
+    { { 'A', 'M', 'D'                     }, {{ 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'A', 'M', 'D' }} },
+    { { 'I', 'N', 'T', 'E', 'L'           }, {{ 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l' }} },
+    { { 'V', 'I', 'A'                     }, {{ 'C', 'e', 'n', 't', 'a', 'u', 'r', 'H', 'a', 'u', 'l', 's' }} },
+    { { 'V', 'I', 'A'                     }, {{ 'V', 'I', 'A',  0 , 'V', 'I', 'A',  0 , 'V', 'I', 'A',  0  }} },
+    { { 'U', 'N', 'K', 'N', 'O', 'W', 'N' }, {{ 0                                                          }} }
+  };
+
+  uint32_t i;
+  for (i = 0; i < ASMJIT_ARRAY_SIZE(table) - 1; i++)
+    if (table[i].d[0] == d0 && table[i].d[1] == d1 && table[i].d[2] == d2)
+      break;
+  memcpy(cpu._vendor.str, table[i].normalized, 8);
+}
+
+static ASMJIT_FAVOR_SIZE void simplifyCpuBrand(char* s) noexcept {
+  char* d = s;
+
+  char c = s[0];
+  char prev = 0;
+
+  // Used to always clear the current character to ensure that the result
+  // doesn't contain garbage after a new null terminator is placed at the end.
+  s[0] = '\0';
+
+  for (;;) {
+    if (!c)
+      break;
+
+    if (!(c == ' ' && (prev == '@' || s[1] == ' ' || s[1] == '@'))) {
+      *d++ = c;
+      prev = c;
+    }
+
+    c = *++s;
+    s[0] = '\0';
+  }
+
+  d[0] = '\0';
+}
+
+static ASMJIT_FAVOR_SIZE void detectX86Cpu(CpuInfo& cpu) noexcept {
+  using Support::bitTest;
+
+  cpuid_t regs;
+  xgetbv_t xcr0 { 0, 0 };
+  CpuFeatures::X86& features = cpu.features().x86();
+
+  cpu._wasDetected = true;
+  cpu._maxLogicalProcessors = 1;
+
+  // We are gonna execute CPUID, which was introduced by I486, so it's the requirement.
+  features.add(CpuFeatures::X86::kI486);
+
+  // CPUID EAX=0
+  // -----------
+
+  // Get vendor string/id.
+  cpuidQuery(&regs, 0x0);
+
+  uint32_t maxId = regs.eax;
+  uint32_t maxSubLeafId_0x7 = 0;
+
+  simplifyCpuVendor(cpu, regs.ebx, regs.edx, regs.ecx);
+
+  // CPUID EAX=1
+  // -----------
+
+  if (maxId >= 0x1) {
+    // Get feature flags in ECX/EDX and family/model in EAX.
+    cpuidQuery(&regs, 0x1);
+
+    // Fill family and model fields.
+    uint32_t modelId  = (regs.eax >> 4) & 0x0F;
+    uint32_t familyId = (regs.eax >> 8) & 0x0F;
+
+    // Use extended family and model fields.
+    if (familyId == 0x06u || familyId == 0x0Fu)
+      modelId += (((regs.eax >> 16) & 0x0Fu) << 4);
+
+    if (familyId == 0x0Fu)
+      familyId += ((regs.eax >> 20) & 0xFFu);
+
+    cpu._modelId              = modelId;
+    cpu._familyId             = familyId;
+    cpu._brandId              = ((regs.ebx      ) & 0xFF);
+    cpu._processorType        = ((regs.eax >> 12) & 0x03);
+    cpu._maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF);
+    cpu._stepping             = ((regs.eax      ) & 0x0F);
+    cpu._cacheLineSize        = ((regs.ebx >>  8) & 0xFF) * 8;
+
+    features.addIf(bitTest(regs.ecx,  0), CpuFeatures::X86::kSSE3);
+    features.addIf(bitTest(regs.ecx,  1), CpuFeatures::X86::kPCLMULQDQ);
+    features.addIf(bitTest(regs.ecx,  3), CpuFeatures::X86::kMONITOR);
+    features.addIf(bitTest(regs.ecx,  5), CpuFeatures::X86::kVMX);
+    features.addIf(bitTest(regs.ecx,  6), CpuFeatures::X86::kSMX);
+    features.addIf(bitTest(regs.ecx,  9), CpuFeatures::X86::kSSSE3);
+    features.addIf(bitTest(regs.ecx, 13), CpuFeatures::X86::kCMPXCHG16B);
+    features.addIf(bitTest(regs.ecx, 19), CpuFeatures::X86::kSSE4_1);
+    features.addIf(bitTest(regs.ecx, 20), CpuFeatures::X86::kSSE4_2);
+    features.addIf(bitTest(regs.ecx, 22), CpuFeatures::X86::kMOVBE);
+    features.addIf(bitTest(regs.ecx, 23), CpuFeatures::X86::kPOPCNT);
+    features.addIf(bitTest(regs.ecx, 25), CpuFeatures::X86::kAESNI);
+    features.addIf(bitTest(regs.ecx, 26), CpuFeatures::X86::kXSAVE);
+    features.addIf(bitTest(regs.ecx, 27), CpuFeatures::X86::kOSXSAVE);
+    features.addIf(bitTest(regs.ecx, 30), CpuFeatures::X86::kRDRAND);
+    features.addIf(bitTest(regs.edx,  0), CpuFeatures::X86::kFPU);
+    features.addIf(bitTest(regs.edx,  4), CpuFeatures::X86::kRDTSC);
+    features.addIf(bitTest(regs.edx,  5), CpuFeatures::X86::kMSR);
+    features.addIf(bitTest(regs.edx,  8), CpuFeatures::X86::kCMPXCHG8B);
+    features.addIf(bitTest(regs.edx, 15), CpuFeatures::X86::kCMOV);
+    features.addIf(bitTest(regs.edx, 19), CpuFeatures::X86::kCLFLUSH);
+    features.addIf(bitTest(regs.edx, 23), CpuFeatures::X86::kMMX);
+    features.addIf(bitTest(regs.edx, 24), CpuFeatures::X86::kFXSR);
+    features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kSSE);
+    features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kSSE, CpuFeatures::X86::kSSE2);
+    features.addIf(bitTest(regs.edx, 28), CpuFeatures::X86::kMT);
+
+    // Get the content of XCR0 if supported by the CPU and enabled by the OS.
+    if (features.hasXSAVE() && features.hasOSXSAVE()) {
+      xgetbvQuery(&xcr0, 0);
+    }
+
+    // Detect AVX+.
+    if (bitTest(regs.ecx, 28)) {
+      // - XCR0[2:1] == 11b
+      //   XMM & YMM states need to be enabled by OS.
+      if ((xcr0.eax & 0x00000006u) == 0x00000006u) {
+        features.add(CpuFeatures::X86::kAVX);
+        features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kFMA);
+        features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kF16C);
+      }
+    }
+  }
+
+  constexpr uint32_t kXCR0_AMX_Bits = 0x3u << 17;
+  bool amxEnabledByOS = (xcr0.eax & kXCR0_AMX_Bits) == kXCR0_AMX_Bits;
+
+#if defined(__APPLE__)
+  // Apple platform provides on-demand AVX512 support. When an AVX512 instruction is used the first time it results
+  // in #UD, which would cause the thread being promoted to use AVX512 support by the OS in addition to enabling the
+  // necessary bits in XCR0 register.
+  bool avx512EnabledByOS = true;
+#else
+  // - XCR0[2:1] ==  11b - XMM/YMM states need to be enabled by OS.
+  // - XCR0[7:5] == 111b - Upper 256-bit of ZMM0-XMM15 and ZMM16-ZMM31 need to be enabled by OS.
+  constexpr uint32_t kXCR0_AVX512_Bits = (0x3u << 1) | (0x7u << 5);
+  bool avx512EnabledByOS = (xcr0.eax & kXCR0_AVX512_Bits) == kXCR0_AVX512_Bits;
+#endif
+
+  // CPUID EAX=7 ECX=0
+  // -----------------
+
+  // Detect new features if the processor supports CPUID-07.
+  bool maybeMPX = false;
+
+  if (maxId >= 0x7) {
+    cpuidQuery(&regs, 0x7);
+
+    maybeMPX = bitTest(regs.ebx, 14);
+    maxSubLeafId_0x7 = regs.eax;
+
+    features.addIf(bitTest(regs.ebx,  0), CpuFeatures::X86::kFSGSBASE);
+    features.addIf(bitTest(regs.ebx,  3), CpuFeatures::X86::kBMI);
+    features.addIf(bitTest(regs.ebx,  4), CpuFeatures::X86::kHLE);
+    features.addIf(bitTest(regs.ebx,  7), CpuFeatures::X86::kSMEP);
+    features.addIf(bitTest(regs.ebx,  8), CpuFeatures::X86::kBMI2);
+    features.addIf(bitTest(regs.ebx,  9), CpuFeatures::X86::kERMS);
+    features.addIf(bitTest(regs.ebx, 11), CpuFeatures::X86::kRTM);
+    features.addIf(bitTest(regs.ebx, 18), CpuFeatures::X86::kRDSEED);
+    features.addIf(bitTest(regs.ebx, 19), CpuFeatures::X86::kADX);
+    features.addIf(bitTest(regs.ebx, 20), CpuFeatures::X86::kSMAP);
+    features.addIf(bitTest(regs.ebx, 23), CpuFeatures::X86::kCLFLUSHOPT);
+    features.addIf(bitTest(regs.ebx, 24), CpuFeatures::X86::kCLWB);
+    features.addIf(bitTest(regs.ebx, 29), CpuFeatures::X86::kSHA);
+    features.addIf(bitTest(regs.ecx,  0), CpuFeatures::X86::kPREFETCHWT1);
+    features.addIf(bitTest(regs.ecx,  4), CpuFeatures::X86::kOSPKE);
+    features.addIf(bitTest(regs.ecx,  5), CpuFeatures::X86::kWAITPKG);
+    features.addIf(bitTest(regs.ecx,  7), CpuFeatures::X86::kCET_SS);
+    features.addIf(bitTest(regs.ecx,  8), CpuFeatures::X86::kGFNI);
+    features.addIf(bitTest(regs.ecx,  9), CpuFeatures::X86::kVAES);
+    features.addIf(bitTest(regs.ecx, 10), CpuFeatures::X86::kVPCLMULQDQ);
+    features.addIf(bitTest(regs.ecx, 22), CpuFeatures::X86::kRDPID);
+    features.addIf(bitTest(regs.ecx, 25), CpuFeatures::X86::kCLDEMOTE);
+    features.addIf(bitTest(regs.ecx, 27), CpuFeatures::X86::kMOVDIRI);
+    features.addIf(bitTest(regs.ecx, 28), CpuFeatures::X86::kMOVDIR64B);
+    features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kENQCMD);
+    features.addIf(bitTest(regs.edx,  5), CpuFeatures::X86::kUINTR);
+    features.addIf(bitTest(regs.edx, 14), CpuFeatures::X86::kSERIALIZE);
+    features.addIf(bitTest(regs.edx, 16), CpuFeatures::X86::kTSXLDTRK);
+    features.addIf(bitTest(regs.edx, 18), CpuFeatures::X86::kPCONFIG);
+    features.addIf(bitTest(regs.edx, 20), CpuFeatures::X86::kCET_IBT);
+
+    // Detect 'TSX' - Requires at least one of `HLE` and `RTM` features.
+    if (features.hasHLE() || features.hasRTM())
+      features.add(CpuFeatures::X86::kTSX);
+
+    // Detect 'AVX2' - Requires AVX as well.
+    if (bitTest(regs.ebx, 5) && features.hasAVX())
+      features.add(CpuFeatures::X86::kAVX2);
+
+    // Detect 'AVX512'.
+    if (avx512EnabledByOS && bitTest(regs.ebx, 16)) {
+      features.add(CpuFeatures::X86::kAVX512_F);
+
+      features.addIf(bitTest(regs.ebx, 17), CpuFeatures::X86::kAVX512_DQ);
+      features.addIf(bitTest(regs.ebx, 21), CpuFeatures::X86::kAVX512_IFMA);
+      features.addIf(bitTest(regs.ebx, 26), CpuFeatures::X86::kAVX512_PFI);
+      features.addIf(bitTest(regs.ebx, 27), CpuFeatures::X86::kAVX512_ERI);
+      features.addIf(bitTest(regs.ebx, 28), CpuFeatures::X86::kAVX512_CDI);
+      features.addIf(bitTest(regs.ebx, 30), CpuFeatures::X86::kAVX512_BW);
+      features.addIf(bitTest(regs.ebx, 31), CpuFeatures::X86::kAVX512_VL);
+      features.addIf(bitTest(regs.ecx,  1), CpuFeatures::X86::kAVX512_VBMI);
+      features.addIf(bitTest(regs.ecx,  6), CpuFeatures::X86::kAVX512_VBMI2);
+      features.addIf(bitTest(regs.ecx, 11), CpuFeatures::X86::kAVX512_VNNI);
+      features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kAVX512_BITALG);
+      features.addIf(bitTest(regs.ecx, 14), CpuFeatures::X86::kAVX512_VPOPCNTDQ);
+      features.addIf(bitTest(regs.edx,  2), CpuFeatures::X86::kAVX512_4VNNIW);
+      features.addIf(bitTest(regs.edx,  3), CpuFeatures::X86::kAVX512_4FMAPS);
+      features.addIf(bitTest(regs.edx,  8), CpuFeatures::X86::kAVX512_VP2INTERSECT);
+      features.addIf(bitTest(regs.edx, 23), CpuFeatures::X86::kAVX512_FP16);
+    }
+
+    // Detect 'AMX'.
+    if (amxEnabledByOS) {
+      features.addIf(bitTest(regs.edx, 22), CpuFeatures::X86::kAMX_BF16);
+      features.addIf(bitTest(regs.edx, 24), CpuFeatures::X86::kAMX_TILE);
+      features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kAMX_INT8);
+    }
+  }
+
+  // CPUID EAX=7 ECX=1
+  // -----------------
+
+  if (features.hasAVX512_F() && maxSubLeafId_0x7 >= 1) {
+    cpuidQuery(&regs, 0x7, 1);
+
+    features.addIf(bitTest(regs.eax,  3), CpuFeatures::X86::kAVX_VNNI);
+    features.addIf(bitTest(regs.eax,  5), CpuFeatures::X86::kAVX512_BF16);
+    features.addIf(bitTest(regs.eax, 22), CpuFeatures::X86::kHRESET);
+  }
+
+  // CPUID EAX=13 ECX=0
+  // ------------------
+
+  if (maxId >= 0xD) {
+    cpuidQuery(&regs, 0xD, 0);
+
+    // Both CPUID result and XCR0 has to be enabled to have support for MPX.
+    if (((regs.eax & xcr0.eax) & 0x00000018u) == 0x00000018u && maybeMPX)
+      features.add(CpuFeatures::X86::kMPX);
+
+    cpuidQuery(&regs, 0xD, 1);
+
+    features.addIf(bitTest(regs.eax, 0), CpuFeatures::X86::kXSAVEOPT);
+    features.addIf(bitTest(regs.eax, 1), CpuFeatures::X86::kXSAVEC);
+    features.addIf(bitTest(regs.eax, 3), CpuFeatures::X86::kXSAVES);
+  }
+
+  // CPUID EAX=14 ECX=0
+  // ------------------
+
+  if (maxId >= 0xE) {
+    cpuidQuery(&regs, 0xE, 0);
+
+    features.addIf(bitTest(regs.ebx, 4), CpuFeatures::X86::kPTWRITE);
+  }
+
+  // CPUID EAX=0x80000000...maxId
+  // ----------------------------
+
+  maxId = 0x80000000u;
+  uint32_t i = maxId;
+
+  // The highest EAX that we understand.
+  constexpr uint32_t kHighestProcessedEAX = 0x8000001Fu;
+
+  // Several CPUID calls are required to get the whole branc string. It's easier
+  // to copy one DWORD at a time instead of copying the string a byte by byte.
+  uint32_t* brand = cpu._brand.u32;
+  do {
+    cpuidQuery(&regs, i);
+    switch (i) {
+      case 0x80000000u:
+        maxId = Support::min<uint32_t>(regs.eax, kHighestProcessedEAX);
+        break;
+
+      case 0x80000001u:
+        features.addIf(bitTest(regs.ecx,  0), CpuFeatures::X86::kLAHFSAHF);
+        features.addIf(bitTest(regs.ecx,  2), CpuFeatures::X86::kSVM);
+        features.addIf(bitTest(regs.ecx,  5), CpuFeatures::X86::kLZCNT);
+        features.addIf(bitTest(regs.ecx,  6), CpuFeatures::X86::kSSE4A);
+        features.addIf(bitTest(regs.ecx,  7), CpuFeatures::X86::kMSSE);
+        features.addIf(bitTest(regs.ecx,  8), CpuFeatures::X86::kPREFETCHW);
+        features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kSKINIT);
+        features.addIf(bitTest(regs.ecx, 15), CpuFeatures::X86::kLWP);
+        features.addIf(bitTest(regs.ecx, 21), CpuFeatures::X86::kTBM);
+        features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kMONITORX);
+        features.addIf(bitTest(regs.edx, 20), CpuFeatures::X86::kNX);
+        features.addIf(bitTest(regs.edx, 21), CpuFeatures::X86::kFXSROPT);
+        features.addIf(bitTest(regs.edx, 22), CpuFeatures::X86::kMMX2);
+        features.addIf(bitTest(regs.edx, 27), CpuFeatures::X86::kRDTSCP);
+        features.addIf(bitTest(regs.edx, 29), CpuFeatures::X86::kPREFETCHW);
+        features.addIf(bitTest(regs.edx, 30), CpuFeatures::X86::k3DNOW2, CpuFeatures::X86::kMMX2);
+        features.addIf(bitTest(regs.edx, 31), CpuFeatures::X86::kPREFETCHW);
+
+        if (features.hasAVX()) {
+          features.addIf(bitTest(regs.ecx, 11), CpuFeatures::X86::kXOP);
+          features.addIf(bitTest(regs.ecx, 16), CpuFeatures::X86::kFMA4);
+        }
+
+        // This feature seems to be only supported by AMD.
+        if (cpu.isVendor("AMD")) {
+          features.addIf(bitTest(regs.ecx,  4), CpuFeatures::X86::kALTMOVCR8);
+        }
+        break;
+
+      case 0x80000002u:
+      case 0x80000003u:
+      case 0x80000004u:
+        *brand++ = regs.eax;
+        *brand++ = regs.ebx;
+        *brand++ = regs.ecx;
+        *brand++ = regs.edx;
+
+        // Go directly to the next one we are interested in.
+        if (i == 0x80000004u)
+          i = 0x80000008u - 1;
+        break;
+
+      case 0x80000008u:
+        features.addIf(bitTest(regs.ebx,  0), CpuFeatures::X86::kCLZERO);
+        features.addIf(bitTest(regs.ebx,  0), CpuFeatures::X86::kRDPRU);
+        features.addIf(bitTest(regs.ebx,  8), CpuFeatures::X86::kMCOMMIT);
+        features.addIf(bitTest(regs.ebx,  9), CpuFeatures::X86::kWBNOINVD);
+
+        // Go directly to the next one we are interested in.
+        i = 0x8000001Fu - 1;
+        break;
+
+      case 0x8000001Fu:
+        features.addIf(bitTest(regs.eax,  4), CpuFeatures::X86::kSNP);
+        break;
+    }
+  } while (++i <= maxId);
+
+  // Simplify CPU brand string a bit by removing some unnecessary spaces.
+  simplifyCpuBrand(cpu._brand.str);
+}
+
+#endif // ASMJIT_ARCH_X86
+
+// CpuInfo - Detect - ARM
+// ======================
+
+// The most relevant and accurate information can be found here:
+//   https://github.com/llvm-project/llvm/blob/master/lib/Target/AArch64/AArch64.td
+//   https://github.com/apple/llvm-project/blob/apple/main/llvm/lib/Target/AArch64/AArch64.td (Apple fork)
+//
+// Other resources:
+//   https://en.wikipedia.org/wiki/AArch64
+//   https://en.wikipedia.org/wiki/Apple_silicon#List_of_Apple_processors
+//   https://developer.arm.com/architectures/learn-the-architecture/understanding-the-armv8-x-extensions/single-page
+
+#if ASMJIT_ARCH_ARM
+
+static inline void populateBaseARMFeatures(CpuInfo& cpu) noexcept {
+#if ASMJIT_ARCH_ARM == 32
+  // No baseline flags at the moment.
+  DebugUtils::unused(cpu);
+#else
+  // AArch64 is based on ARMv8-A and later.
+  cpu.addFeature(CpuFeatures::ARM::kARMv6);
+  cpu.addFeature(CpuFeatures::ARM::kARMv7);
+  cpu.addFeature(CpuFeatures::ARM::kARMv8a);
+
+  // AArch64 comes with these features by default.
+  cpu.addFeature(CpuFeatures::ARM::kVFPv2);
+  cpu.addFeature(CpuFeatures::ARM::kVFPv3);
+  cpu.addFeature(CpuFeatures::ARM::kVFPv4);
+  cpu.addFeature(CpuFeatures::ARM::kASIMD);
+  cpu.addFeature(CpuFeatures::ARM::kIDIVA);
+#endif
+}
+
+// Detects ARM version by macros defined at compile time. This means that AsmJit will report features forced at
+// compile time that should always be provided by the target CPU. This also means that if we don't provide any
+// means to detect CPU features the features reported by AsmJit will at least not report less features than the
+// target it was compiled to.
+ASMJIT_MAYBE_UNUSED
+static ASMJIT_FAVOR_SIZE void detectARMFeaturesViaCompilerFlags(CpuInfo& cpu) noexcept {
+  DebugUtils::unused(cpu);
+
+#if ASMJIT_ARCH_ARM == 32
+
+  // ARM targets have no baseline at the moment.
+# if defined(__ARM_ARCH_7A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv7);
+# endif
+# if defined(__ARM_ARCH_8A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv8a);
+# endif
+
+# if defined(__TARGET_ARCH_THUMB)
+  cpu.addFeature(CpuFeatures::ARM::kTHUMB);
+# if __TARGET_ARCH_THUMB >= 4
+  cpu.addFeature(CpuFeatures::ARM::kTHUMBv2);
+# endif
+# endif
+
+# if defined(__ARM_FEATURE_FMA)
+  cpu.addFeature(CpuFeatures::ARM::kVFPv3);
+  cpu.addFeature(CpuFeatures::ARM::kVFPv4);
+# endif
+
+# if defined(__ARM_NEON)
+  cpu.addFeature(CpuFeatures::ARM::kASIMD);
+# endif
+
+# if defined(__ARM_FEATURE_IDIV) && defined(__TARGET_ARCH_THUMB)
+  cpu.addFeature(CpuFeatures::ARM::kIDIVT);
+#endif
+# if defined(__ARM_FEATURE_IDIV) && !defined(__TARGET_ARCH_THUMB)
+  cpu.addFeature(CpuFeatures::ARM::kIDIVA);
+# endif
+
+#endif
+
+#if defined(__ARM_ARCH_8_1A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv8_1a);
+#endif
+#if defined(__ARM_ARCH_8_2A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv8_2a);
+#endif
+#if defined(__ARM_ARCH_8_3A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv8_3a);
+#endif
+#if defined(__ARM_ARCH_8_4A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv8_4a);
+#endif
+#if defined(__ARM_ARCH_8_5A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv8_5a);
+#endif
+#if defined(__ARM_ARCH_8_6A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv8_6a);
+#endif
+#if defined(__ARM_ARCH_8_7A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv8_7a);
+#endif
+
+#if defined(__ARM_FEATURE_AES)
+  cpu.addFeature(CpuFeatures::ARM::kAES);
+#endif
+
+#if defined(__ARM_FEATURE_BF16_SCALAR_ARITHMETIC) && defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)
+  cpu.addFeature(CpuFeatures::ARM::kBF16);
+#endif
+
+#if defined(__ARM_FEATURE_CRC32)
+  cpu.addFeature(CpuFeatures::ARM::kCRC32);
+#endif
+
+#if defined(__ARM_FEATURE_CRYPTO)
+  cpu.addFeature(CpuFeatures::ARM::kAES,
+                 CpuFeatures::ARM::kSHA1,
+                 CpuFeatures::ARM::kSHA2);
+#endif
+
+#if defined(__ARM_FEATURE_DOTPROD)
+  cpu.addFeature(CpuFeatures::ARM::kDOTPROD);
+#endif
+
+#if defined(__ARM_FEATURE_FP16FML) || defined(__ARM_FEATURE_FP16_FML)
+  cpu.addFeature(CpuFeatures::ARM::kFP16FML);
+#endif
+
+#if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)
+  cpu.addFeature(CpuFeatures::ARM::kFP16FULL);
+#endif
+
+#if defined(__ARM_FEATURE_FRINT)
+  cpu.addFeature(CpuFeatures::ARM::kFRINT);
+#endif
+
+#if defined(__ARM_FEATURE_JCVT)
+  cpu.addFeature(CpuFeatures::ARM::kFJCVTZS);
+#endif
+
+#if defined(__ARM_FEATURE_MATMUL_INT8)
+  cpu.addFeature(CpuFeatures::ARM::kI8MM);
+#endif
+
+#if defined(__ARM_FEATURE_ATOMICS)
+  cpu.addFeature(CpuFeatures::ARM::kLSE);
+#endif
+
+#if defined(__ARM_FEATURE_MEMORY_TAGGING)
+  cpu.addFeature(CpuFeatures::ARM::kMTE);
+#endif
+
+#if defined(__ARM_FEATURE_QRDMX)
+  cpu.addFeature(CpuFeatures::ARM::kRDM);
+#endif
+
+#if defined(__ARM_FEATURE_RNG)
+  cpu.addFeature(CpuFeatures::ARM::kRNG);
+#endif
+
+#if defined(__ARM_FEATURE_SHA2)
+  cpu.addFeature(CpuFeatures::ARM::kSHA2);
+#endif
+
+#if defined(__ARM_FEATURE_SHA3)
+  cpu.addFeature(CpuFeatures::ARM::kSHA3);
+#endif
+
+#if defined(__ARM_FEATURE_SHA512)
+  cpu.addFeature(CpuFeatures::ARM::kSHA512);
+#endif
+
+#if defined(__ARM_FEATURE_SM3)
+  cpu.addFeature(CpuFeatures::ARM::kSM3);
+#endif
+
+#if defined(__ARM_FEATURE_SM4)
+  cpu.addFeature(CpuFeatures::ARM::kSM4);
+#endif
+
+#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_SVE_VECTOR_OPERATORS)
+  cpu.addFeature(CpuFeatures::ARM::kSVE);
+#endif
+
+#if defined(__ARM_FEATURE_SVE_MATMUL_INT8)
+  cpu.addFeature(CpuFeatures::ARM::kSVE_I8MM);
+#endif
+
+#if defined(__ARM_FEATURE_SVE_MATMUL_FP32)
+  cpu.addFeature(CpuFeatures::ARM::kSVE_F32MM);
+#endif
+
+#if defined(__ARM_FEATURE_SVE_MATMUL_FP64)
+  cpu.addFeature(CpuFeatures::ARM::kSVE_F64MM);
+#endif
+
+#if defined(__ARM_FEATURE_SVE2)
+  cpu.addFeature(CpuFeatures::ARM::kSVE2);
+#endif
+
+#if defined(__ARM_FEATURE_SVE2_AES)
+  cpu.addFeature(CpuFeatures::ARM::kSVE2_AES);
+#endif
+
+#if defined(__ARM_FEATURE_SVE2_BITPERM)
+  cpu.addFeature(CpuFeatures::ARM::kSVE2_BITPERM);
+#endif
+
+#if defined(__ARM_FEATURE_SVE2_SHA3)
+  cpu.addFeature(CpuFeatures::ARM::kSVE2_SHA3);
+#endif
+
+#if defined(__ARM_FEATURE_SVE2_SM4)
+  cpu.addFeature(CpuFeatures::ARM::kSVE2_SM4);
+#endif
+
+#if defined(__ARM_FEATURE_TME)
+  cpu.addFeature(CpuFeatures::ARM::kTME);
+#endif
+}
+
+ASMJIT_MAYBE_UNUSED
+static ASMJIT_FAVOR_SIZE void expandARMFeaturesByVersion(CpuInfo& cpu) noexcept {
+  CpuFeatures::ARM& features = cpu.features().arm();
+
+  if (features.hasARMv8_7a()) {
+    features.add(CpuFeatures::ARM::kARMv8_6a);
+  }
+
+  if (features.hasARMv8_6a()) {
+    features.add(CpuFeatures::ARM::kARMv8_5a,
+                 CpuFeatures::ARM::kBF16);
+
+    if (features.hasSVE())
+      features.add(CpuFeatures::ARM::kSVE_I8MM);
+  }
+
+  if (features.hasARMv8_5a()) {
+    features.add(CpuFeatures::ARM::kARMv8_4a,
+                 CpuFeatures::ARM::kALTNZCV,
+                 CpuFeatures::ARM::kBTI,
+                 CpuFeatures::ARM::kFRINT,
+                 CpuFeatures::ARM::kSB,
+                 CpuFeatures::ARM::kSSBS);
+  }
+
+  if (features.hasARMv8_4a()) {
+    features.add(CpuFeatures::ARM::kARMv8_3a,
+                 CpuFeatures::ARM::kDIT,
+                 CpuFeatures::ARM::kDOTPROD,
+                 CpuFeatures::ARM::kFLAGM,
+                 CpuFeatures::ARM::kPMU,
+                 CpuFeatures::ARM::kRCPC_IMMO);
+  }
+
+  if (features.hasARMv8_3a()) {
+    features.add(CpuFeatures::ARM::kARMv8_2a,
+                 CpuFeatures::ARM::kFCMA,
+                 CpuFeatures::ARM::kFJCVTZS);
+  }
+
+  if (features.hasARMv8_2a()) {
+    features.add(CpuFeatures::ARM::kARMv8_1a);
+  }
+
+  if (features.hasARMv8_1a()) {
+    features.add(CpuFeatures::ARM::kARMv8a,
+                 CpuFeatures::ARM::kCRC32,
+                 CpuFeatures::ARM::kLSE,
+                 CpuFeatures::ARM::kRDM);
+  }
+
+  if (features.hasARMv8a()) {
+    features.add(CpuFeatures::ARM::kARMv7,
+                 CpuFeatures::ARM::kVFPv2,
+                 CpuFeatures::ARM::kVFPv3,
+                 CpuFeatures::ARM::kVFPv4,
+                 CpuFeatures::ARM::kVFP_D32,
+                 CpuFeatures::ARM::kASIMD,
+                 CpuFeatures::ARM::kIDIVA);
+  }
+}
+
+// CpuInfo - Detect - ARM [Windows]
+// ================================
+
+#if defined(_WIN32)
+struct WinPFPMapping {
+  uint8_t featureId;
+  uint8_t pfpFeatureId;
+};
+
+static ASMJIT_FAVOR_SIZE void detectPFPFeatures(CpuInfo& cpu, const WinPFPMapping* mapping, size_t size) noexcept {
+  for (size_t i = 0; i < size; i++)
+    if (::IsProcessorFeaturePresent(mapping[i].pfpFeatureId))
+      cpu.addFeature(mapping[i].featureId);
+}
+
+//! Detect ARM CPU features on Windows.
+//!
+//! The detection is based on `IsProcessorFeaturePresent()` API call.
+static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
+  cpu._wasDetected = true;
+  populateBaseARMFeatures(cpu);
+
+  CpuFeatures::ARM& features = cpu.features().arm();
+
+  // Win32 for ARM requires ARMv7 with DSP extensions, VFPv3, and uses THUMBv2 by default.
+#if ASMJIT_ARCH_ARM == 32
+  features.add(CpuFeatures::ARM::kTHUMB);
+  features.add(CpuFeatures::ARM::kTHUMBv2);
+  features.add(CpuFeatures::ARM::kARMv6);
+  features.add(CpuFeatures::ARM::kARMv7);
+  features.add(CpuFeatures::ARM::kEDSP);
+  features.add(CpuFeatures::ARM::kVFPv2);
+  features.add(CpuFeatures::ARM::kVFPv3);
+#endif
+
+  // Windows for ARM requires ASIMD.
+  features.add(CpuFeatures::ARM::kASIMD);
+
+  // Detect additional CPU features by calling `IsProcessorFeaturePresent()`.
+  static const WinPFPMapping mapping[] = {
+#if ASMJIT_ARCH_ARM == 32
+    { uint8_t(CpuFeatures::ARM::kVFP_D32)  , 18 }, // PF_ARM_VFP_32_REGISTERS_AVAILABLE
+    { uint8_t(CpuFeatures::ARM::kIDIVT)    , 24 }, // PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE
+    { uint8_t(CpuFeatures::ARM::kVFPv4)    , 27 }, // PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE
+    { uint8_t(CpuFeatures::ARM::kARMv8a)   , 29 }, // PF_ARM_V8_INSTRUCTIONS_AVAILABLE
+#endif
+    { uint8_t(CpuFeatures::ARM::kAES)      , 30 }, // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE
+    { uint8_t(CpuFeatures::ARM::kCRC32)    , 31 }, // PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE
+    { uint8_t(CpuFeatures::ARM::kLSE)      , 34 }  // PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE
+
+  };
+  detectPFPFeatures(cpu, mapping, ASMJIT_ARRAY_SIZE(mapping));
+
+  // Windows provides several instructions under a single flag:
+  if (features.hasAES()) {
+    features.add(CpuFeatures::ARM::kSHA1,
+                 CpuFeatures::ARM::kSHA2);
+  }
+
+  expandARMFeaturesByVersion(cpu);
+}
+
+// CpuInfo - Detect - ARM [Linux]
+// ==============================
+
+#elif defined(__linux__)
+
+struct LinuxHWCapMapping {
+  uint8_t featureId;
+  uint8_t hwCapBit;
+};
+
+static ASMJIT_FAVOR_SIZE void detectHWCaps(CpuInfo& cpu, unsigned long type, const LinuxHWCapMapping* mapping, size_t size) noexcept {
+  unsigned long mask = getauxval(type);
+  for (size_t i = 0; i < size; i++)
+    cpu.features().addIf(Support::bitTest(mask, mapping[i].hwCapBit), mapping[i].featureId);
+}
+
+#if ASMJIT_ARCH_ARM == 32
+
+// `AT_HWCAP` provides ARMv7 (and less) related flags.
+static const LinuxHWCapMapping hwCapMapping[] = {
+  { uint8_t(CpuFeatures::ARM::kVFPv2)       , 6  }, // HWCAP_VFP
+  { uint8_t(CpuFeatures::ARM::kEDSP)        , 7  }, // HWCAP_EDSP
+  { uint8_t(CpuFeatures::ARM::kASIMD)       , 12 }, // HWCAP_NEON
+  { uint8_t(CpuFeatures::ARM::kVFPv3)       , 13 }, // HWCAP_VFPv3
+  { uint8_t(CpuFeatures::ARM::kVFPv4)       , 16 }, // HWCAP_VFPv4
+  { uint8_t(CpuFeatures::ARM::kIDIVA)       , 17 }, // HWCAP_IDIVA
+  { uint8_t(CpuFeatures::ARM::kIDIVT)       , 18 }, // HWCAP_IDIVT
+  { uint8_t(CpuFeatures::ARM::kVFP_D32)     , 19 }  // HWCAP_VFPD32
+};
+
+// `AT_HWCAP2` provides ARMv8+ related flags.
+static const LinuxHWCapMapping hwCap2Mapping[] = {
+  { uint8_t(CpuFeatures::ARM::kAES)         , 0  }, // HWCAP2_AES
+  { uint8_t(CpuFeatures::ARM::kPMULL)       , 1  }, // HWCAP2_PMULL
+  { uint8_t(CpuFeatures::ARM::kSHA1)        , 2  }, // HWCAP2_SHA1
+  { uint8_t(CpuFeatures::ARM::kSHA2)        , 3  }, // HWCAP2_SHA2
+  { uint8_t(CpuFeatures::ARM::kCRC32)       , 4  }  // HWCAP2_CRC32
+};
+
+static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
+  cpu._wasDetected = true;
+
+  populateBaseARMFeatures(cpu);
+
+  CpuFeatures::ARM& features = cpu.features().arm();
+
+  detectHWCaps(cpu, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
+  detectHWCaps(cpu, AT_HWCAP2, hwCap2Mapping, ASMJIT_ARRAY_SIZE(hwCap2Mapping));
+
+  // VFPv3 implies VFPv2.
+  if (features.hasVFPv3())
+    features.add(CpuFeatures::ARM::kVFPv2);
+
+  // VFPv2 implies ARMv6.
+  if (features.hasVFPv2())
+    features.add(CpuFeatures::ARM::kARMv6);
+
+  // ARMv7 provides VFPv3|ASIMD.
+  if (features.hasVFPv3() || features.hasASIMD())
+    features.add(CpuFeatures::ARM::kARMv7);
+
+  // ARMv8 provives AES, CRC32, PMULL, SHA1, and SHA2.
+  if (features.hasAES() || features.hasCRC32() || features.hasPMULL() || features.hasSHA1() || features.hasSHA2())
+    features.add(CpuFeatures::ARM::kARMv8a);
+}
+
+#else
+
+// `AT_HWCAP` provides ARMv8+ related flags.
+static const LinuxHWCapMapping hwCapMapping[] = {
+  /*
+  { uint8_t(CpuFeatures::ARM::k)            , 0  }, // HWCAP_FP
+  */
+  { uint8_t(CpuFeatures::ARM::kASIMD)       , 1  }, // HWCAP_ASIMD
+  /*
+  { uint8_t(CpuFeatures::ARM::k)            , 2  }, // HWCAP_EVTSTRM
+  */
+  { uint8_t(CpuFeatures::ARM::kAES)         , 3  }, // HWCAP_AES
+  { uint8_t(CpuFeatures::ARM::kPMULL)       , 4  }, // HWCAP_PMULL
+  { uint8_t(CpuFeatures::ARM::kSHA1)        , 5  }, // HWCAP_SHA1
+  { uint8_t(CpuFeatures::ARM::kSHA2)        , 6  }, // HWCAP_SHA2
+  { uint8_t(CpuFeatures::ARM::kCRC32)       , 7  }, // HWCAP_CRC32
+  { uint8_t(CpuFeatures::ARM::kLSE)         , 8  }, // HWCAP_ATOMICS
+  { uint8_t(CpuFeatures::ARM::kFP16CONV)    , 9  }, // HWCAP_FPHP
+  { uint8_t(CpuFeatures::ARM::kFP16FULL)    , 10 }, // HWCAP_ASIMDHP
+  { uint8_t(CpuFeatures::ARM::kCPUID)       , 11 }, // HWCAP_CPUID
+  { uint8_t(CpuFeatures::ARM::kRDM)         , 12 }, // HWCAP_ASIMDRDM
+  { uint8_t(CpuFeatures::ARM::kFJCVTZS)     , 13 }, // HWCAP_JSCVT
+  { uint8_t(CpuFeatures::ARM::kFCMA)        , 14 }, // HWCAP_FCMA
+  /*
+  { uint8_t(CpuFeatures::ARM::k)            , 15 }, // HWCAP_LRCPC
+  { uint8_t(CpuFeatures::ARM::k)            , 16 }, // HWCAP_DCPOP
+  */
+  { uint8_t(CpuFeatures::ARM::kSHA3)        , 17 }, // HWCAP_SHA3
+  { uint8_t(CpuFeatures::ARM::kSM3)         , 18 }, // HWCAP_SM3
+  { uint8_t(CpuFeatures::ARM::kSM4)         , 19 }, // HWCAP_SM4
+  { uint8_t(CpuFeatures::ARM::kDOTPROD)     , 20 }, // HWCAP_ASIMDDP
+  { uint8_t(CpuFeatures::ARM::kSHA512)      , 21 }, // HWCAP_SHA512
+  { uint8_t(CpuFeatures::ARM::kSVE)         , 22 }, // HWCAP_SVE
+  { uint8_t(CpuFeatures::ARM::kFP16FML)     , 23 }, // HWCAP_ASIMDFHM
+  { uint8_t(CpuFeatures::ARM::kDIT)         , 24 }, // HWCAP_DIT
+  /*
+  { uint8_t(CpuFeatures::ARM::k)            , 25 }, // HWCAP_USCAT
+  { uint8_t(CpuFeatures::ARM::k)            , 26 }, // HWCAP_ILRCPC
+  */
+  { uint8_t(CpuFeatures::ARM::kFLAGM)       , 27 }, // HWCAP_FLAGM
+  { uint8_t(CpuFeatures::ARM::kSSBS)        , 28 }, // HWCAP_SSBS
+  { uint8_t(CpuFeatures::ARM::kSB)          , 29 }  // HWCAP_SB
+  /*
+  { uint8_t(CpuFeatures::ARM::k)            , 30 }, // HWCAP_PACA
+  { uint8_t(CpuFeatures::ARM::k)            , 31 }  // HWCAP_PACG
+  */
+};
+
+// `AT_HWCAP2` provides ARMv8+ related flags.
+static const LinuxHWCapMapping hwCapMapping2[] = {
+  /*
+  { uint8_t(CpuFeatures::ARM::k)            , 0  }, // HWCAP2_DCPODP
+  */
+  { uint8_t(CpuFeatures::ARM::kSVE2)        , 1  }, // HWCAP2_SVE2
+  { uint8_t(CpuFeatures::ARM::kSVE2_AES)    , 2  }, // HWCAP2_SVEAES
+  { uint8_t(CpuFeatures::ARM::kSVE_PMULL)   , 3  }, // HWCAP2_SVEPMULL
+  { uint8_t(CpuFeatures::ARM::kSVE2_BITPERM), 4  }, // HWCAP2_SVEBITPERM
+  { uint8_t(CpuFeatures::ARM::kSVE2_SHA3)   , 5  }, // HWCAP2_SVESHA3
+  { uint8_t(CpuFeatures::ARM::kSVE2_SM4)    , 6  }, // HWCAP2_SVESM4
+  { uint8_t(CpuFeatures::ARM::kALTNZCV)     , 7  }, // HWCAP2_FLAGM2
+  { uint8_t(CpuFeatures::ARM::kFRINT)       , 8  }, // HWCAP2_FRINT
+  { uint8_t(CpuFeatures::ARM::kSVE_I8MM)    , 9  }, // HWCAP2_SVEI8MM
+  { uint8_t(CpuFeatures::ARM::kSVE_F32MM)   , 10 }, // HWCAP2_SVEF32MM
+  { uint8_t(CpuFeatures::ARM::kSVE_F64MM)   , 11 }, // HWCAP2_SVEF64MM
+  { uint8_t(CpuFeatures::ARM::kSVE_BF16)    , 12 }, // HWCAP2_SVEBF16
+  { uint8_t(CpuFeatures::ARM::kI8MM)        , 13 }, // HWCAP2_I8MM
+  { uint8_t(CpuFeatures::ARM::kBF16)        , 14 }, // HWCAP2_BF16
+  { uint8_t(CpuFeatures::ARM::kDGH)         , 15 }, // HWCAP2_DGH
+  { uint8_t(CpuFeatures::ARM::kRNG)         , 16 }, // HWCAP2_RNG
+  { uint8_t(CpuFeatures::ARM::kBTI)         , 17 }, // HWCAP2_BTI
+  { uint8_t(CpuFeatures::ARM::kMTE)         , 18 }  // HWCAP2_MTE
+};
+
+static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
+  cpu._wasDetected = true;
+  populateBaseARMFeatures(cpu);
+
+  detectHWCaps(cpu, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
+  detectHWCaps(cpu, AT_HWCAP2, hwCapMapping2, ASMJIT_ARRAY_SIZE(hwCapMapping2));
+}
+
+#endif
+
+// CpuInfo - Detect - ARM [Apple]
+// ==============================
+
+#elif defined(__APPLE__)
+
+namespace AppleHWId {
+  enum CpuFamily : uint32_t {
+    // Generic ARM.
+    kCpuFamily_ARM_9              = 0xE73283AEu,
+    kCpuFamily_ARM_11             = 0x8FF620D8u,
+    kCpuFamily_ARM_12             = 0xBD1B0AE9u,
+    kCpuFamily_ARM_13             = 0x0CC90E64u,
+    kCpuFamily_ARM_14             = 0x96077EF1u,
+    kCpuFamily_ARM_15             = 0xA8511BCAu,
+
+    // Apple design.
+    kCpuFamily_SWIFT              = 0x1E2D6381u,
+    kCpuFamily_CYCLONE            = 0x37A09642u,
+    kCpuFamily_TYPHOON            = 0x2C91A47Eu,
+    kCpuFamily_TWISTER            = 0x92FB37C8u,
+    kCpuFamily_HURRICANE          = 0x67CEEE93u,
+    kCpuFamily_MONSOON_MISTRAL    = 0xE81E7EF6u,
+    kCpuFamily_VORTEX_TEMPEST     = 0x07D34B9Fu,
+    kCpuFamily_LIGHTNING_THUNDER  = 0x462504D2u,
+    kCpuFamily_FIRESTORM_ICESTORM = 0x1B588BB3u
+  };
+};
+
+static ASMJIT_FAVOR_SIZE uint32_t queryARMCpuFamilyId() noexcept {
+  uint32_t result = 0;
+  size_t size = sizeof(result);
+
+  int res = sysctlbyname("hw.cpufamily", &result, &size, nullptr, 0);
+  if (res != 0)
+    return 0;
+  else
+    return result;
+}
+
+static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
+  cpu._wasDetected = true;
+  populateBaseARMFeatures(cpu);
+
+  uint32_t cpuFamilyId = queryARMCpuFamilyId();
+  CpuFeatures::ARM& features = cpu.features().arm();
+
+  switch (cpuFamilyId) {
+    case AppleHWId::kCpuFamily_ARM_9:
+    case AppleHWId::kCpuFamily_ARM_11:
+    case AppleHWId::kCpuFamily_ARM_12:
+      break;
+
+    // ARM Cortex A8.
+    case AppleHWId::kCpuFamily_ARM_13:
+      break;
+
+    // ARM Cortex A9.
+    case AppleHWId::kCpuFamily_ARM_14:
+      break;
+
+    // ARM Cortex A7 - ARMv7k.
+    case AppleHWId::kCpuFamily_ARM_15:
+      features.add(CpuFeatures::ARM::kARMv7);
+      break;
+
+    // Apple A6/A6X - ARMv7s.
+    case AppleHWId::kCpuFamily_SWIFT:
+      features.add(CpuFeatures::ARM::kARMv7);
+      break;
+
+    // Apple A7 - ARMv8.0-A.
+    case AppleHWId::kCpuFamily_CYCLONE:
+      features.add(CpuFeatures::ARM::kARMv8a,
+                   CpuFeatures::ARM::kAES,
+                   CpuFeatures::ARM::kSHA1,
+                   CpuFeatures::ARM::kSHA2);
+      break;
+
+    // Apple A8 - ARMv8.0-A.
+    case AppleHWId::kCpuFamily_TYPHOON:
+      features.add(CpuFeatures::ARM::kARMv8a,
+                   CpuFeatures::ARM::kAES,
+                   CpuFeatures::ARM::kSHA1,
+                   CpuFeatures::ARM::kSHA2);
+      break;
+
+    // Apple A9 - ARMv8.0-A.
+    case AppleHWId::kCpuFamily_TWISTER:
+      features.add(CpuFeatures::ARM::kARMv8a,
+                   CpuFeatures::ARM::kAES,
+                   CpuFeatures::ARM::kSHA1,
+                   CpuFeatures::ARM::kSHA2);
+      break;
+
+    // Apple A10 - ARMv8.1-A.
+    case AppleHWId::kCpuFamily_HURRICANE:
+      features.add(CpuFeatures::ARM::kARMv8_1a,
+                   CpuFeatures::ARM::kAES,
+                   CpuFeatures::ARM::kRDM,
+                   CpuFeatures::ARM::kSHA1,
+                   CpuFeatures::ARM::kSHA2);
+
+      break;
+
+    // Apple A11 - ARMv8.2-A.
+    case AppleHWId::kCpuFamily_MONSOON_MISTRAL:
+      features.add(CpuFeatures::ARM::kARMv8_2a,
+                   CpuFeatures::ARM::kAES,
+                   CpuFeatures::ARM::kFP16FULL,
+                   CpuFeatures::ARM::kSHA1,
+                   CpuFeatures::ARM::kSHA2);
+      break;
+
+    // Apple A12 - ARMv8.3-A.
+    case AppleHWId::kCpuFamily_VORTEX_TEMPEST:
+      features.add(CpuFeatures::ARM::kARMv8_3a,
+                   CpuFeatures::ARM::kAES,
+                   CpuFeatures::ARM::kFP16FULL,
+                   CpuFeatures::ARM::kSHA1,
+                   CpuFeatures::ARM::kSHA2);
+      break;
+
+    // Apple A13 - ARMv8.4-A.
+    case AppleHWId::kCpuFamily_LIGHTNING_THUNDER:
+      features.add(CpuFeatures::ARM::kARMv8_4a,
+                   CpuFeatures::ARM::kAES,
+                   CpuFeatures::ARM::kFP16FML,
+                   CpuFeatures::ARM::kFP16FULL,
+                   CpuFeatures::ARM::kSHA1,
+                   CpuFeatures::ARM::kSHA2,
+                   CpuFeatures::ARM::kSHA3,
+                   CpuFeatures::ARM::kSHA512);
+      break;
+
+    // Apple A14/M1 - ARMv8.5-A.
+    case AppleHWId::kCpuFamily_FIRESTORM_ICESTORM:
+      features.add(CpuFeatures::ARM::kARMv8_4a,
+                   CpuFeatures::ARM::kAES,
+                   CpuFeatures::ARM::kALTNZCV,
+                   CpuFeatures::ARM::kFP16FML,
+                   CpuFeatures::ARM::kFP16FULL,
+                   CpuFeatures::ARM::kFRINT,
+                   CpuFeatures::ARM::kSB,
+                   CpuFeatures::ARM::kSHA1,
+                   CpuFeatures::ARM::kSHA2,
+                   CpuFeatures::ARM::kSHA3,
+                   CpuFeatures::ARM::kSHA512,
+                   CpuFeatures::ARM::kSSBS);
+      break;
+
+    default:
+      cpu._wasDetected = false;
+      break;
+  }
+
+  expandARMFeaturesByVersion(cpu);
+}
+
+// CpuInfo - Detect - ARM [Unknown]
+// ================================
+
+#else
+
+#if ASMJIT_ARCH_ARM == 64
+  #pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown OS with AArch64 CPU)")
+#else
+  #pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown OS with ARM CPU)")
+#endif
+
+static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
+  populateBaseARMFeatures(cpu);
+  detectARMFeaturesViaCompilerFlags(cpu);
+  expandARMFeaturesByVersion(cpu);
+}
+#endif
+
+#endif
+
+// CpuInfo - Detect - Host
+// =======================
+
+static uint32_t cpuInfoInitialized;
+static CpuInfo cpuInfoGlobal(Globals::NoInit);
+
+const CpuInfo& CpuInfo::host() noexcept {
+  // This should never cause a problem as the resulting information should always be the same. In the worst case we
+  // would just overwrite it non-atomically.
+  if (!cpuInfoInitialized) {
+    CpuInfo cpuInfoLocal;
+
+    cpuInfoLocal._arch = Arch::kHost;
+    cpuInfoLocal._subArch = SubArch::kHost;
+
+#if ASMJIT_ARCH_X86
+    detectX86Cpu(cpuInfoLocal);
+#elif ASMJIT_ARCH_ARM
+    detectARMCpu(cpuInfoLocal);
+#else
+    #pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown CPU)")
+#endif
+
+    cpuInfoLocal._hwThreadCount = detectHWThreadCount();
+    cpuInfoGlobal = cpuInfoLocal;
+    cpuInfoInitialized = 1;
+  }
+
+  return cpuInfoGlobal;
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/cpuinfo.h b/lib/lepton/asmjit/core/cpuinfo.h
new file mode 100644
index 0000000000..4af5c3a82f
--- /dev/null
+++ b/lib/lepton/asmjit/core/cpuinfo.h
@@ -0,0 +1,813 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_CPUINFO_H_INCLUDED
+#define ASMJIT_CORE_CPUINFO_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/environment.h"
+#include "../core/globals.h"
+#include "../core/string.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_core
+//! \{
+
+//! CPU features information.
+//!
+//! Each feature is represented by a single bit in an embedded bit array.
+class CpuFeatures {
+public:
+  //! A word that is used to represents feature bits.
+  typedef Support::BitWord BitWord;
+  //! Iterator that can iterate all CPU features set.
+  typedef Support::BitVectorIterator<BitWord> Iterator;
+
+  //! \name Constants
+  //! \{
+
+  //! \cond INTERNAL
+  enum : uint32_t {
+    kMaxFeatures = 256,
+    kNumBitWords = kMaxFeatures / Support::kBitWordSizeInBits
+  };
+  //! \endcond
+
+  //! \}
+
+  //! \name Data
+  //! \{
+
+  //! CPU features data.
+  struct Data {
+    //! \name Members
+    //! \{
+
+    //! Data bits.
+    Support::Array<BitWord, kNumBitWords> _bits;
+
+    //! \}
+
+    //! \name Overloaded Operators
+    //! \{
+
+    inline bool operator==(const Data& other) noexcept { return  eq(other); }
+    inline bool operator!=(const Data& other) noexcept { return !eq(other); }
+
+    //! \}
+
+    //! \name Accessors
+    //! \{
+
+    //! Returns true if there are no features set.
+    inline bool empty() const noexcept { return _bits.aggregate<Support::Or>(0) == 0; }
+
+    //! Returns all features as array of bitwords (see \ref Support::BitWord).
+    inline BitWord* bits() noexcept { return _bits.data(); }
+    //! Returns all features as array of bitwords (const).
+    inline const BitWord* bits() const noexcept { return _bits.data(); }
+
+    //! Returns the number of BitWords returned by \ref bits().
+    inline size_t bitWordCount() const noexcept { return kNumBitWords; }
+
+    //! Returns \ref Support::BitVectorIterator, that can be used to iterate over all features efficiently.
+    inline Iterator iterator() const noexcept { return Iterator(_bits.data(), kNumBitWords); }
+
+    //! Tests whether the feature `featureId` is present.
+    template<typename FeatureId>
+    ASMJIT_FORCE_INLINE bool has(const FeatureId& featureId) const noexcept {
+      ASMJIT_ASSERT(uint32_t(featureId) < kMaxFeatures);
+
+      uint32_t idx = uint32_t(featureId) / Support::kBitWordSizeInBits;
+      uint32_t bit = uint32_t(featureId) % Support::kBitWordSizeInBits;
+
+      return bool((_bits[idx] >> bit) & 0x1);
+    }
+
+    //! Tests whether all features as defined by `other` are present.
+    ASMJIT_FORCE_INLINE bool hasAll(const Data& other) const noexcept {
+      for (uint32_t i = 0; i < kNumBitWords; i++)
+        if ((_bits[i] & other._bits[i]) != other._bits[i])
+          return false;
+      return true;
+    }
+
+    //! \}
+
+    //! \name Manipulation
+    //! \{
+
+    inline void reset() noexcept { _bits.fill(0); }
+
+    //! Adds the given CPU `featureId` to the list of features.
+    template<typename FeatureId>
+    ASMJIT_FORCE_INLINE void add(const FeatureId& featureId) noexcept {
+      ASMJIT_ASSERT(uint32_t(featureId) < kMaxFeatures);
+
+      uint32_t idx = uint32_t(featureId) / Support::kBitWordSizeInBits;
+      uint32_t bit = uint32_t(featureId) % Support::kBitWordSizeInBits;
+
+      _bits[idx] |= BitWord(1) << bit;
+    }
+
+    template<typename FeatureId, typename... Args>
+    ASMJIT_FORCE_INLINE void add(const FeatureId& featureId, Args&&... otherFeatureIds) noexcept {
+      add(featureId);
+      add(std::forward<Args>(otherFeatureIds)...);
+    }
+
+    template<typename FeatureId>
+    ASMJIT_FORCE_INLINE void addIf(bool condition, const FeatureId& featureId) noexcept {
+      ASMJIT_ASSERT(uint32_t(featureId) < kMaxFeatures);
+
+      uint32_t idx = uint32_t(featureId) / Support::kBitWordSizeInBits;
+      uint32_t bit = uint32_t(featureId) % Support::kBitWordSizeInBits;
+
+      _bits[idx] |= BitWord(condition) << bit;
+    }
+
+    template<typename FeatureId, typename... Args>
+    ASMJIT_FORCE_INLINE void addIf(bool condition, const FeatureId& featureId, Args&&... otherFeatureIds) noexcept {
+      addIf(condition, featureId);
+      addIf(condition, std::forward<Args>(otherFeatureIds)...);
+    }
+
+    //! Removes the given CPU `featureId` from the list of features.
+    template<typename FeatureId>
+    ASMJIT_FORCE_INLINE void remove(const FeatureId& featureId) noexcept {
+      ASMJIT_ASSERT(uint32_t(featureId) < kMaxFeatures);
+
+      uint32_t idx = uint32_t(featureId) / Support::kBitWordSizeInBits;
+      uint32_t bit = uint32_t(featureId) % Support::kBitWordSizeInBits;
+
+      _bits[idx] &= ~(BitWord(1) << bit);
+    }
+
+    template<typename FeatureId, typename... Args>
+    ASMJIT_FORCE_INLINE void remove(const FeatureId& featureId, Args&&... otherFeatureIds) noexcept {
+      remove(featureId);
+      remove(std::forward<Args>(otherFeatureIds)...);
+    }
+
+    //! Tests whether this CPU features data matches `other`.
+    ASMJIT_FORCE_INLINE bool eq(const Data& other) const noexcept { return _bits == other._bits; }
+
+    //! \}
+
+  };
+
+  //! X86 specific features data.
+  struct X86 : public Data {
+    //! X86 CPU feature identifiers.
+    enum Id : uint8_t {
+      // @EnumValuesBegin{"enum": "CpuFeatures::X86"}@
+      kNone,                     //!< No feature (never set, used internally).
+
+      kMT,                       //!< CPU has multi-threading capabilities.
+      kNX,                       //!< CPU has Not-Execute-Bit aka DEP (data-execution prevention).
+      k3DNOW,                    //!< CPU has 3DNOW            (3DNOW base instructions) [AMD].
+      k3DNOW2,                   //!< CPU has 3DNOW2           (enhanced 3DNOW) [AMD].
+      kADX,                      //!< CPU has ADX              (multi-precision add-carry instruction extensions).
+      kAESNI,                    //!< CPU has AESNI            (AES encode/decode instructions).
+      kALTMOVCR8,                //!< CPU has LOCK MOV R<->CR0 (supports `MOV R<->CR8` via `LOCK MOV R<->CR0` in 32-bit mode) [AMD].
+      kAMX_BF16,                 //!< CPU has AMX_BF16         (advanced matrix extensions - BF16 instructions).
+      kAMX_INT8,                 //!< CPU has AMX_INT8         (advanced matrix extensions - INT8 instructions).
+      kAMX_TILE,                 //!< CPU has AMX_TILE         (advanced matrix extensions).
+      kAVX,                      //!< CPU has AVX              (advanced vector extensions).
+      kAVX2,                     //!< CPU has AVX2             (advanced vector extensions 2).
+      kAVX512_4FMAPS,            //!< CPU has AVX512_FMAPS     (FMA packed single).
+      kAVX512_4VNNIW,            //!< CPU has AVX512_VNNIW     (vector NN instructions word variable precision).
+      kAVX512_BF16,              //!< CPU has AVX512_BF16      (BFLOAT16 support instruction).
+      kAVX512_BITALG,            //!< CPU has AVX512_BITALG    (VPOPCNT[B|W], VPSHUFBITQMB).
+      kAVX512_BW,                //!< CPU has AVX512_BW        (packed BYTE|WORD).
+      kAVX512_CDI,               //!< CPU has AVX512_CDI       (conflict detection).
+      kAVX512_DQ,                //!< CPU has AVX512_DQ        (packed DWORD|QWORD).
+      kAVX512_ERI,               //!< CPU has AVX512_ERI       (exponential and reciprocal).
+      kAVX512_F,                 //!< CPU has AVX512_F         (AVX512 foundation).
+      kAVX512_FP16,              //!< CPU has AVX512_FP16      (FP16 extensions).
+      kAVX512_IFMA,              //!< CPU has AVX512_IFMA      (integer fused-multiply-add using 52-bit precision).
+      kAVX512_PFI,               //!< CPU has AVX512_PFI       (prefetch instructions).
+      kAVX512_VBMI,              //!< CPU has AVX512_VBMI      (vector byte manipulation).
+      kAVX512_VBMI2,             //!< CPU has AVX512_VBMI2     (vector byte manipulation 2).
+      kAVX512_VL,                //!< CPU has AVX512_VL        (vector length extensions).
+      kAVX512_VNNI,              //!< CPU has AVX512_VNNI      (vector neural network instructions).
+      kAVX512_VP2INTERSECT,      //!< CPU has AVX512_VP2INTERSECT
+      kAVX512_VPOPCNTDQ,         //!< CPU has AVX512_VPOPCNTDQ (VPOPCNT[D|Q] instructions).
+      kAVX_VNNI,                 //!< CPU has AVX_VNNI         (VEX encoding of vpdpbusd/vpdpbusds/vpdpwssd/vpdpwssds).
+      kBMI,                      //!< CPU has BMI              (bit manipulation instructions #1).
+      kBMI2,                     //!< CPU has BMI2             (bit manipulation instructions #2).
+      kCET_IBT,                  //!< CPU has CET-IBT          (indirect branch tracking).
+      kCET_SS,                   //!< CPU has CET-SS.
+      kCLDEMOTE,                 //!< CPU has CLDEMOTE         (cache line demote).
+      kCLFLUSH,                  //!< CPU has CLFUSH           (Cache Line flush).
+      kCLFLUSHOPT,               //!< CPU has CLFUSHOPT        (Cache Line flush - optimized).
+      kCLWB,                     //!< CPU has CLWB.
+      kCLZERO,                   //!< CPU has CLZERO.
+      kCMOV,                     //!< CPU has CMOV             (CMOV and FCMOV instructions).
+      kCMPXCHG16B,               //!< CPU has CMPXCHG16B       (compare-exchange 16 bytes) [X86_64].
+      kCMPXCHG8B,                //!< CPU has CMPXCHG8B        (compare-exchange 8 bytes).
+      kENCLV,                    //!< CPU has ENCLV.
+      kENQCMD,                   //!< CPU has ENQCMD           (enqueue stores).
+      kERMS,                     //!< CPU has ERMS             (enhanced REP MOVSB/STOSB).
+      kF16C,                     //!< CPU has F16C.
+      kFMA,                      //!< CPU has FMA              (fused-multiply-add 3 operand form).
+      kFMA4,                     //!< CPU has FMA4             (fused-multiply-add 4 operand form).
+      kFPU,                      //!< CPU has FPU              (FPU support).
+      kFSGSBASE,                 //!< CPU has FSGSBASE.
+      kFXSR,                     //!< CPU has FXSR             (FXSAVE/FXRSTOR instructions).
+      kFXSROPT,                  //!< CPU has FXSROTP          (FXSAVE/FXRSTOR is optimized).
+      kGEODE,                    //!< CPU has GEODE extensions (3DNOW additions).
+      kGFNI,                     //!< CPU has GFNI             (Galois field instructions).
+      kHLE,                      //!< CPU has HLE.
+      kHRESET,                   //!< CPU has HRESET.
+      kI486,                     //!< CPU has I486 features    (I486+ support).
+      kLAHFSAHF,                 //!< CPU has LAHF/SAHF        (LAHF/SAHF in 64-bit mode) [X86_64].
+      kLWP,                      //!< CPU has LWP              (lightweight profiling) [AMD].
+      kLZCNT,                    //!< CPU has LZCNT            (LZCNT instruction).
+      kMCOMMIT,                  //!< CPU has MCOMMIT          (MCOMMIT instruction).
+      kMMX,                      //!< CPU has MMX              (MMX base instructions).
+      kMMX2,                     //!< CPU has MMX2             (MMX extensions or MMX2).
+      kMONITOR,                  //!< CPU has MONITOR          (MONITOR/MWAIT instructions).
+      kMONITORX,                 //!< CPU has MONITORX         (MONITORX/MWAITX instructions).
+      kMOVBE,                    //!< CPU has MOVBE            (move with byte-order swap).
+      kMOVDIR64B,                //!< CPU has MOVDIR64B        (move 64 bytes as direct store).
+      kMOVDIRI,                  //!< CPU has MOVDIRI          (move dword/qword as direct store).
+      kMPX,                      //!< CPU has MPX              (memory protection extensions).
+      kMSR,                      //!< CPU has MSR              (RDMSR/WRMSR instructions).
+      kMSSE,                     //!< CPU has MSSE             (misaligned SSE support).
+      kOSXSAVE,                  //!< CPU has OSXSAVE          (XSAVE enabled by OS).
+      kOSPKE,                    //!< CPU has OSPKE            (PKE enabled by OS).
+      kPCLMULQDQ,                //!< CPU has PCLMULQDQ        (packed carry-less multiplication).
+      kPCONFIG,                  //!< CPU has PCONFIG          (PCONFIG instruction).
+      kPOPCNT,                   //!< CPU has POPCNT           (POPCNT instruction).
+      kPREFETCHW,                //!< CPU has PREFETCHW.
+      kPREFETCHWT1,              //!< CPU has PREFETCHWT1.
+      kPTWRITE,                  //!< CPU has PTWRITE.
+      kRDPID,                    //!< CPU has RDPID.
+      kRDPRU,                    //!< CPU has RDPRU.
+      kRDRAND,                   //!< CPU has RDRAND.
+      kRDSEED,                   //!< CPU has RDSEED.
+      kRDTSC,                    //!< CPU has RDTSC.
+      kRDTSCP,                   //!< CPU has RDTSCP.
+      kRTM,                      //!< CPU has RTM.
+      kSERIALIZE,                //!< CPU has SERIALIZE.
+      kSHA,                      //!< CPU has SHA              (SHA-1 and SHA-256 instructions).
+      kSKINIT,                   //!< CPU has SKINIT           (SKINIT/STGI instructions) [AMD].
+      kSMAP,                     //!< CPU has SMAP             (supervisor-mode access prevention).
+      kSMEP,                     //!< CPU has SMEP             (supervisor-mode execution prevention).
+      kSMX,                      //!< CPU has SMX              (safer mode extensions).
+      kSNP,                      //!< CPU has SNP.
+      kSSE,                      //!< CPU has SSE.
+      kSSE2,                     //!< CPU has SSE2.
+      kSSE3,                     //!< CPU has SSE3.
+      kSSE4_1,                   //!< CPU has SSE4.1.
+      kSSE4_2,                   //!< CPU has SSE4.2.
+      kSSE4A,                    //!< CPU has SSE4A [AMD].
+      kSSSE3,                    //!< CPU has SSSE3.
+      kSVM,                      //!< CPU has SVM              (virtualization) [AMD].
+      kTBM,                      //!< CPU has TBM              (trailing bit manipulation) [AMD].
+      kTSX,                      //!< CPU has TSX.
+      kTSXLDTRK,                 //!< CPU has TSXLDTRK.
+      kUINTR,                    //!< CPU has UINTR            (user interrupts).
+      kVAES,                     //!< CPU has VAES             (vector AES 256|512 bit support).
+      kVMX,                      //!< CPU has VMX              (virtualization) [INTEL].
+      kVPCLMULQDQ,               //!< CPU has VPCLMULQDQ       (vector PCLMULQDQ 256|512-bit support).
+      kWAITPKG,                  //!< CPU has WAITPKG          (UMONITOR, UMWAIT, TPAUSE).
+      kWBNOINVD,                 //!< CPU has WBNOINVD.
+      kXOP,                      //!< CPU has XOP              (XOP instructions) [AMD].
+      kXSAVE,                    //!< CPU has XSAVE.
+      kXSAVEC,                   //!< CPU has XSAVEC.
+      kXSAVEOPT,                 //!< CPU has XSAVEOPT.
+      kXSAVES,                   //!< CPU has XSAVES.
+      // @EnumValuesEnd@
+
+      kMaxValue = kXSAVES
+    };
+
+    #define ASMJIT_X86_FEATURE(FEATURE) \
+      inline bool has##FEATURE() const noexcept { return has(X86::k##FEATURE); }
+
+    ASMJIT_X86_FEATURE(MT)
+    ASMJIT_X86_FEATURE(NX)
+    ASMJIT_X86_FEATURE(3DNOW)
+    ASMJIT_X86_FEATURE(3DNOW2)
+    ASMJIT_X86_FEATURE(ADX)
+    ASMJIT_X86_FEATURE(AESNI)
+    ASMJIT_X86_FEATURE(ALTMOVCR8)
+    ASMJIT_X86_FEATURE(AMX_BF16)
+    ASMJIT_X86_FEATURE(AMX_INT8)
+    ASMJIT_X86_FEATURE(AMX_TILE)
+    ASMJIT_X86_FEATURE(AVX)
+    ASMJIT_X86_FEATURE(AVX2)
+    ASMJIT_X86_FEATURE(AVX512_4FMAPS)
+    ASMJIT_X86_FEATURE(AVX512_4VNNIW)
+    ASMJIT_X86_FEATURE(AVX512_BF16)
+    ASMJIT_X86_FEATURE(AVX512_BITALG)
+    ASMJIT_X86_FEATURE(AVX512_BW)
+    ASMJIT_X86_FEATURE(AVX512_CDI)
+    ASMJIT_X86_FEATURE(AVX512_DQ)
+    ASMJIT_X86_FEATURE(AVX512_ERI)
+    ASMJIT_X86_FEATURE(AVX512_F)
+    ASMJIT_X86_FEATURE(AVX512_FP16)
+    ASMJIT_X86_FEATURE(AVX512_IFMA)
+    ASMJIT_X86_FEATURE(AVX512_PFI)
+    ASMJIT_X86_FEATURE(AVX512_VBMI)
+    ASMJIT_X86_FEATURE(AVX512_VBMI2)
+    ASMJIT_X86_FEATURE(AVX512_VL)
+    ASMJIT_X86_FEATURE(AVX512_VNNI)
+    ASMJIT_X86_FEATURE(AVX512_VP2INTERSECT)
+    ASMJIT_X86_FEATURE(AVX512_VPOPCNTDQ)
+    ASMJIT_X86_FEATURE(AVX_VNNI)
+    ASMJIT_X86_FEATURE(BMI)
+    ASMJIT_X86_FEATURE(BMI2)
+    ASMJIT_X86_FEATURE(CET_IBT)
+    ASMJIT_X86_FEATURE(CET_SS)
+    ASMJIT_X86_FEATURE(CLDEMOTE)
+    ASMJIT_X86_FEATURE(CLFLUSH)
+    ASMJIT_X86_FEATURE(CLFLUSHOPT)
+    ASMJIT_X86_FEATURE(CLWB)
+    ASMJIT_X86_FEATURE(CLZERO)
+    ASMJIT_X86_FEATURE(CMOV)
+    ASMJIT_X86_FEATURE(CMPXCHG16B)
+    ASMJIT_X86_FEATURE(CMPXCHG8B)
+    ASMJIT_X86_FEATURE(ENCLV)
+    ASMJIT_X86_FEATURE(ENQCMD)
+    ASMJIT_X86_FEATURE(ERMS)
+    ASMJIT_X86_FEATURE(F16C)
+    ASMJIT_X86_FEATURE(FMA)
+    ASMJIT_X86_FEATURE(FMA4)
+    ASMJIT_X86_FEATURE(FPU)
+    ASMJIT_X86_FEATURE(FSGSBASE)
+    ASMJIT_X86_FEATURE(FXSR)
+    ASMJIT_X86_FEATURE(FXSROPT)
+    ASMJIT_X86_FEATURE(GEODE)
+    ASMJIT_X86_FEATURE(GFNI)
+    ASMJIT_X86_FEATURE(HLE)
+    ASMJIT_X86_FEATURE(HRESET)
+    ASMJIT_X86_FEATURE(I486)
+    ASMJIT_X86_FEATURE(LAHFSAHF)
+    ASMJIT_X86_FEATURE(LWP)
+    ASMJIT_X86_FEATURE(LZCNT)
+    ASMJIT_X86_FEATURE(MCOMMIT)
+    ASMJIT_X86_FEATURE(MMX)
+    ASMJIT_X86_FEATURE(MMX2)
+    ASMJIT_X86_FEATURE(MONITOR)
+    ASMJIT_X86_FEATURE(MONITORX)
+    ASMJIT_X86_FEATURE(MOVBE)
+    ASMJIT_X86_FEATURE(MOVDIR64B)
+    ASMJIT_X86_FEATURE(MOVDIRI)
+    ASMJIT_X86_FEATURE(MPX)
+    ASMJIT_X86_FEATURE(MSR)
+    ASMJIT_X86_FEATURE(MSSE)
+    ASMJIT_X86_FEATURE(OSXSAVE)
+    ASMJIT_X86_FEATURE(OSPKE)
+    ASMJIT_X86_FEATURE(PCLMULQDQ)
+    ASMJIT_X86_FEATURE(PCONFIG)
+    ASMJIT_X86_FEATURE(POPCNT)
+    ASMJIT_X86_FEATURE(PREFETCHW)
+    ASMJIT_X86_FEATURE(PREFETCHWT1)
+    ASMJIT_X86_FEATURE(PTWRITE)
+    ASMJIT_X86_FEATURE(RDPID)
+    ASMJIT_X86_FEATURE(RDPRU)
+    ASMJIT_X86_FEATURE(RDRAND)
+    ASMJIT_X86_FEATURE(RDSEED)
+    ASMJIT_X86_FEATURE(RDTSC)
+    ASMJIT_X86_FEATURE(RDTSCP)
+    ASMJIT_X86_FEATURE(RTM)
+    ASMJIT_X86_FEATURE(SERIALIZE)
+    ASMJIT_X86_FEATURE(SHA)
+    ASMJIT_X86_FEATURE(SKINIT)
+    ASMJIT_X86_FEATURE(SMAP)
+    ASMJIT_X86_FEATURE(SMEP)
+    ASMJIT_X86_FEATURE(SMX)
+    ASMJIT_X86_FEATURE(SNP)
+    ASMJIT_X86_FEATURE(SSE)
+    ASMJIT_X86_FEATURE(SSE2)
+    ASMJIT_X86_FEATURE(SSE3)
+    ASMJIT_X86_FEATURE(SSE4_1)
+    ASMJIT_X86_FEATURE(SSE4_2)
+    ASMJIT_X86_FEATURE(SSE4A)
+    ASMJIT_X86_FEATURE(SSSE3)
+    ASMJIT_X86_FEATURE(SVM)
+    ASMJIT_X86_FEATURE(TBM)
+    ASMJIT_X86_FEATURE(TSX)
+    ASMJIT_X86_FEATURE(TSXLDTRK)
+    ASMJIT_X86_FEATURE(UINTR)
+    ASMJIT_X86_FEATURE(VAES)
+    ASMJIT_X86_FEATURE(VMX)
+    ASMJIT_X86_FEATURE(VPCLMULQDQ)
+    ASMJIT_X86_FEATURE(WAITPKG)
+    ASMJIT_X86_FEATURE(WBNOINVD)
+    ASMJIT_X86_FEATURE(XOP)
+    ASMJIT_X86_FEATURE(XSAVE)
+    ASMJIT_X86_FEATURE(XSAVEC)
+    ASMJIT_X86_FEATURE(XSAVEOPT)
+    ASMJIT_X86_FEATURE(XSAVES)
+
+    #undef ASMJIT_X86_FEATURE
+  };
+
+  //! ARM specific features data.
+  struct ARM : public Data {
+    //! ARM CPU feature identifiers.
+    enum Id : uint8_t {
+      // @EnumValuesBegin{"enum": "CpuFeatures::ARM"}@
+      kNone = 0,                 //!< No feature (never set, used internally).
+      kTHUMB,                    //!< THUMB v1 ISA.
+      kTHUMBv2,                  //!< THUMB v2 ISA.
+
+      kARMv6,                    //!< ARMv6 ISA.
+      kARMv7,                    //!< ARMv7 ISA.
+      kARMv8a,                   //!< ARMv8-A ISA.
+      kARMv8_1a,                 //!< ARMv8.1-A ISA.
+      kARMv8_2a,                 //!< ARMv8.2-A ISA.
+      kARMv8_3a,                 //!< ARMv8.3-A ISA.
+      kARMv8_4a,                 //!< ARMv8.4-A ISA.
+      kARMv8_5a,                 //!< ARMv8.5-A ISA.
+      kARMv8_6a,                 //!< ARMv8.6-A ISA.
+      kARMv8_7a,                 //!< ARMv8.7-A ISA.
+
+      kVFPv2,                    //!< CPU has VFPv2 instruction set.
+      kVFPv3,                    //!< CPU has VFPv3 instruction set.
+      kVFPv4,                    //!< CPU has VFPv4 instruction set.
+      kVFP_D32,                  //!< CPU has 32 VFP-D (64-bit) registers.
+
+      kAES,                      //!< CPU has AES (AArch64 only).
+      kALTNZCV,                  //!< CPU has ALTNZCV (AArch64 only).
+      kASIMD,                    //!< CPU has Advanced SIMD (NEON on ARM/THUMB).
+      kBF16,                     //!< CPU has BF16 (AArch64 only).
+      kBTI,                      //!< CPU has BTI (branch target identification).
+      kCPUID,                    //!< CPU has accessible CPUID register (ID_AA64ZFR0_EL1).
+      kCRC32,                    //!< CPU has CRC32 .
+      kDGH,                      //!< CPU has DGH (AArch64 only).
+      kDIT,                      //!< CPU has data independent timing instructions (DIT).
+      kDOTPROD,                  //!< CPU has DOTPROD (SDOT/UDOT).
+      kEDSP,                     //!< CPU has EDSP (ARM/THUMB only).
+      kFCMA,                     //!< CPU has FCMA (FCADD/FCMLA).
+      kFJCVTZS,                  //!< CPU has FJCVTZS (AArch64 only).
+      kFLAGM,                    //!< CPU has FLAGM (AArch64 only).
+      kFP16CONV,                 //!< CPU has FP16 (half-float) conversion.
+      kFP16FML,                  //!< CPU has FMLAL{2}/FMLSL{2}
+      kFP16FULL,                 //!< CPU has full support for FP16.
+      kFRINT,                    //!< CPU has FRINT[32|64][X|Z] (AArch64 only).
+      kI8MM,                     //!< CPU has I8MM (AArch64 only).
+      kIDIVA,                    //!< CPU has hardware SDIV and UDIV (ARM mode).
+      kIDIVT,                    //!< CPU has hardware SDIV and UDIV (THUMB mode).
+      kLSE,                      //!< CPU has large system extensions (LSE) (AArch64 only).
+      kMTE,                      //!< CPU has MTE (AArch64 only).
+      kRCPC_IMMO,                //!< CPU has RCPC_IMMO (AArch64 only).
+      kRDM,                      //!< CPU has RDM (AArch64 only).
+      kPMU,                      //!< CPU has PMU (AArch64 only).
+      kPMULL,                    //!< CPU has PMULL (AArch64 only).
+      kRNG,                      //!< CPU has random number generation (RNG).
+      kSB,                       //!< CPU has speculative barrier SB (AArch64 only).
+      kSHA1,                     //!< CPU has SHA1.
+      kSHA2,                     //!< CPU has SHA2.
+      kSHA3,                     //!< CPU has SHA3.
+      kSHA512,                   //!< CPU has SHA512.
+      kSM3,                      //!< CPU has SM3.
+      kSM4,                      //!< CPU has SM4.
+      kSSBS,                     //!< CPU has SSBS.
+      kSVE,                      //!< CPU has SVE (AArch64 only).
+      kSVE_BF16,                 //!< CPU has SVE-BF16 (AArch64 only).
+      kSVE_F32MM,                //!< CPU has SVE-F32MM (AArch64 only).
+      kSVE_F64MM,                //!< CPU has SVE-F64MM (AArch64 only).
+      kSVE_I8MM,                 //!< CPU has SVE-I8MM (AArch64 only).
+      kSVE_PMULL,                //!< CPU has SVE-PMULL (AArch64 only).
+      kSVE2,                     //!< CPU has SVE2 (AArch64 only).
+      kSVE2_AES,                 //!< CPU has SVE2-AES (AArch64 only).
+      kSVE2_BITPERM,             //!< CPU has SVE2-BITPERM (AArch64 only).
+      kSVE2_SHA3,                //!< CPU has SVE2-SHA3 (AArch64 only).
+      kSVE2_SM4,                 //!< CPU has SVE2-SM4 (AArch64 only).
+      kTME,                      //!< CPU has transactional memory extensions (TME).
+      // @EnumValuesEnd@
+
+      kMaxValue = kTME
+    };
+
+    #define ASMJIT_ARM_FEATURE(FEATURE) \
+      inline bool has##FEATURE() const noexcept { return has(ARM::k##FEATURE); }
+
+    ASMJIT_ARM_FEATURE(THUMB)
+    ASMJIT_ARM_FEATURE(THUMBv2)
+
+    ASMJIT_ARM_FEATURE(ARMv6)
+    ASMJIT_ARM_FEATURE(ARMv7)
+    ASMJIT_ARM_FEATURE(ARMv8a)
+    ASMJIT_ARM_FEATURE(ARMv8_1a)
+    ASMJIT_ARM_FEATURE(ARMv8_2a)
+    ASMJIT_ARM_FEATURE(ARMv8_3a)
+    ASMJIT_ARM_FEATURE(ARMv8_4a)
+    ASMJIT_ARM_FEATURE(ARMv8_5a)
+    ASMJIT_ARM_FEATURE(ARMv8_6a)
+    ASMJIT_ARM_FEATURE(ARMv8_7a)
+
+    ASMJIT_ARM_FEATURE(VFPv2)
+    ASMJIT_ARM_FEATURE(VFPv3)
+    ASMJIT_ARM_FEATURE(VFPv4)
+    ASMJIT_ARM_FEATURE(VFP_D32)
+
+    ASMJIT_ARM_FEATURE(AES)
+    ASMJIT_ARM_FEATURE(ALTNZCV)
+    ASMJIT_ARM_FEATURE(ASIMD)
+    ASMJIT_ARM_FEATURE(BF16)
+    ASMJIT_ARM_FEATURE(BTI)
+    ASMJIT_ARM_FEATURE(CPUID)
+    ASMJIT_ARM_FEATURE(CRC32)
+    ASMJIT_ARM_FEATURE(DGH)
+    ASMJIT_ARM_FEATURE(DIT)
+    ASMJIT_ARM_FEATURE(DOTPROD)
+    ASMJIT_ARM_FEATURE(EDSP)
+    ASMJIT_ARM_FEATURE(FCMA)
+    ASMJIT_ARM_FEATURE(FLAGM)
+    ASMJIT_ARM_FEATURE(FP16CONV)
+    ASMJIT_ARM_FEATURE(FP16FML)
+    ASMJIT_ARM_FEATURE(FP16FULL)
+    ASMJIT_ARM_FEATURE(FRINT)
+    ASMJIT_ARM_FEATURE(IDIVA)
+    ASMJIT_ARM_FEATURE(IDIVT)
+    ASMJIT_ARM_FEATURE(LSE)
+    ASMJIT_ARM_FEATURE(MTE)
+    ASMJIT_ARM_FEATURE(FJCVTZS)
+    ASMJIT_ARM_FEATURE(I8MM)
+    ASMJIT_ARM_FEATURE(RCPC_IMMO)
+    ASMJIT_ARM_FEATURE(RDM)
+    ASMJIT_ARM_FEATURE(PMU)
+    ASMJIT_ARM_FEATURE(PMULL)
+    ASMJIT_ARM_FEATURE(RNG)
+    ASMJIT_ARM_FEATURE(SB)
+    ASMJIT_ARM_FEATURE(SHA1)
+    ASMJIT_ARM_FEATURE(SHA2)
+    ASMJIT_ARM_FEATURE(SHA3)
+    ASMJIT_ARM_FEATURE(SHA512)
+    ASMJIT_ARM_FEATURE(SM3)
+    ASMJIT_ARM_FEATURE(SM4)
+    ASMJIT_ARM_FEATURE(SSBS)
+    ASMJIT_ARM_FEATURE(SVE)
+    ASMJIT_ARM_FEATURE(SVE_BF16)
+    ASMJIT_ARM_FEATURE(SVE_F32MM)
+    ASMJIT_ARM_FEATURE(SVE_F64MM)
+    ASMJIT_ARM_FEATURE(SVE_I8MM)
+    ASMJIT_ARM_FEATURE(SVE_PMULL)
+    ASMJIT_ARM_FEATURE(SVE2)
+    ASMJIT_ARM_FEATURE(SVE2_AES)
+    ASMJIT_ARM_FEATURE(SVE2_BITPERM)
+    ASMJIT_ARM_FEATURE(SVE2_SHA3)
+    ASMJIT_ARM_FEATURE(SVE2_SM4)
+    ASMJIT_ARM_FEATURE(TME)
+
+    #undef ASMJIT_ARM_FEATURE
+  };
+
+  static_assert(uint32_t(X86::kMaxValue) < kMaxFeatures, "The number of X86 CPU features cannot exceed CpuFeatures::kMaxFeatures");
+  static_assert(uint32_t(ARM::kMaxValue) < kMaxFeatures, "The number of ARM CPU features cannot exceed CpuFeatures::kMaxFeatures");
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  Data _data {};
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline CpuFeatures() noexcept {}
+  inline CpuFeatures(const CpuFeatures& other) noexcept = default;
+  inline explicit CpuFeatures(Globals::NoInit_) noexcept {}
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline CpuFeatures& operator=(const CpuFeatures& other) noexcept = default;
+
+  inline bool operator==(const CpuFeatures& other) noexcept { return  eq(other); }
+  inline bool operator!=(const CpuFeatures& other) noexcept { return !eq(other); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns true if there are no features set.
+  inline bool empty() const noexcept { return _data.empty(); }
+
+  //! Casts this base class into a derived type `T`.
+  template<typename T = Data>
+  inline T& data() noexcept { return static_cast<T&>(_data); }
+
+  //! Casts this base class into a derived type `T` (const).
+  template<typename T = Data>
+  inline const T& data() const noexcept { return static_cast<const T&>(_data); }
+
+  //! Returns CpuFeatures::Data as \ref CpuFeatures::X86.
+  inline X86& x86() noexcept { return data<X86>(); }
+  //! Returns CpuFeatures::Data as \ref CpuFeatures::X86 (const).
+  inline const X86& x86() const noexcept { return data<X86>(); }
+
+  //! Returns CpuFeatures::Data as \ref CpuFeatures::ARM.
+  inline ARM& arm() noexcept { return data<ARM>(); }
+  //! Returns CpuFeatures::Data as \ref CpuFeatures::ARM (const).
+  inline const ARM& arm() const noexcept { return data<ARM>(); }
+
+  //! Returns all features as array of bitwords (see \ref Support::BitWord).
+  inline BitWord* bits() noexcept { return _data.bits(); }
+  //! Returns all features as array of bitwords (const).
+  inline const BitWord* bits() const noexcept { return _data.bits(); }
+  //! Returns the number of BitWords returned by \ref bits().
+  inline size_t bitWordCount() const noexcept { return _data.bitWordCount(); }
+
+  //! Returns \ref Support::BitVectorIterator, that can be used to iterate over all features efficiently.
+  inline Iterator iterator() const noexcept { return _data.iterator(); }
+
+  //! Tests whether the feature `featureId` is present.
+  template<typename FeatureId>
+  inline bool has(const FeatureId& featureId) const noexcept { return _data.has(featureId); }
+
+  //! Tests whether all features as defined by `other` are present.
+  inline bool hasAll(const CpuFeatures& other) const noexcept { return _data.hasAll(other._data); }
+
+  //! \}
+
+  //! \name Manipulation
+  //! \{
+
+  inline void reset() noexcept { _data.reset(); }
+
+  //! Adds the given CPU `featureId` to the list of features.
+  template<typename... Args>
+  inline void add(Args&&... args) noexcept { return _data.add(std::forward<Args>(args)...); }
+
+  //! Adds the given CPU `featureId` to the list of features if `condition` is true.
+  template<typename... Args>
+  inline void addIf(bool condition, Args&&... args) noexcept { return _data.addIf(condition, std::forward<Args>(args)...); }
+
+  //! Removes the given CPU `featureId` from the list of features.
+  template<typename... Args>
+  inline void remove(Args&&... args) noexcept { return _data.remove(std::forward<Args>(args)...); }
+
+  //! Tests whether this CPU features matches `other`.
+  inline bool eq(const CpuFeatures& other) const noexcept { return _data.eq(other._data); }
+
+  //! \}
+};
+
+//! CPU information.
+class CpuInfo {
+public:
+  //! \name Members
+  //! \{
+
+  //! Architecture.
+  Arch _arch;
+  //! Sub-architecture.
+  SubArch _subArch;
+  //! True if the CPU was detected, false if the detection failed or it's not available.
+  bool _wasDetected;
+  //! Reserved for future use.
+  uint8_t _reserved;
+  //! CPU family ID.
+  uint32_t _familyId;
+  //! CPU model ID.
+  uint32_t _modelId;
+  //! CPU brand ID.
+  uint32_t _brandId;
+  //! CPU stepping.
+  uint32_t _stepping;
+  //! Processor type.
+  uint32_t _processorType;
+  //! Maximum number of addressable IDs for logical processors.
+  uint32_t _maxLogicalProcessors;
+  //! Cache line size (in bytes).
+  uint32_t _cacheLineSize;
+  //! Number of hardware threads.
+  uint32_t _hwThreadCount;
+
+  //! CPU vendor string.
+  FixedString<16> _vendor;
+  //! CPU brand string.
+  FixedString<64> _brand;
+  //! CPU features.
+  CpuFeatures _features;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline CpuInfo() noexcept { reset(); }
+  inline CpuInfo(const CpuInfo& other) noexcept = default;
+
+  inline explicit CpuInfo(Globals::NoInit_) noexcept
+    : _features(Globals::NoInit) {};
+
+  //! Returns the host CPU information.
+  ASMJIT_API static const CpuInfo& host() noexcept;
+
+  //! Initializes CpuInfo architecture and sub-architecture members to `arch` and `subArch`, respectively.
+  inline void initArch(Arch arch, SubArch subArch = SubArch::kUnknown) noexcept {
+    _arch = arch;
+    _subArch = subArch;
+  }
+
+  inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline CpuInfo& operator=(const CpuInfo& other) noexcept = default;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the CPU architecture this information relates to.
+  inline Arch arch() const noexcept { return _arch; }
+
+  //! Returns the CPU sub-architecture this information relates to.
+  inline SubArch subArch() const noexcept { return _subArch; }
+
+  //! Returns whether the CPU was detected successfully.
+  //!
+  //! If the returned value is false it means that AsmJit either failed to detect the CPU or it doesn't have
+  //! implementation targeting the host architecture and operating system.
+  inline bool wasDetected() const noexcept { return _wasDetected; }
+
+  //! Returns the CPU family ID.
+  //!
+  //! Family identifier matches the FamilyId read by using CPUID on X86 architecture.
+  inline uint32_t familyId() const noexcept { return _familyId; }
+
+  //! Returns the CPU model ID.
+  //!
+  //! Family identifier matches the ModelId read by using CPUID on X86 architecture.
+
+  inline uint32_t modelId() const noexcept { return _modelId; }
+  //! Returns the CPU brand id.
+  //!
+  //! Family identifier matches the BrandId read by using CPUID on X86 architecture.
+  inline uint32_t brandId() const noexcept { return _brandId; }
+
+  //! Returns the CPU stepping.
+  //!
+  //! Family identifier matches the Stepping information read by using CPUID on X86 architecture.
+  inline uint32_t stepping() const noexcept { return _stepping; }
+
+  //! Returns the processor type.
+  //!
+  //! Family identifier matches the ProcessorType read by using CPUID on X86 architecture.
+  inline uint32_t processorType() const noexcept { return _processorType; }
+
+  //! Returns the maximum number of logical processors.
+  inline uint32_t maxLogicalProcessors() const noexcept { return _maxLogicalProcessors; }
+
+  //! Returns the size of a cache line flush.
+  inline uint32_t cacheLineSize() const noexcept { return _cacheLineSize; }
+
+  //! Returns number of hardware threads available.
+  inline uint32_t hwThreadCount() const noexcept { return _hwThreadCount; }
+
+  //! Returns a CPU vendor string.
+  inline const char* vendor() const noexcept { return _vendor.str; }
+  //! Tests whether the CPU vendor string is equal to `s`.
+  inline bool isVendor(const char* s) const noexcept { return _vendor.eq(s); }
+
+  //! Returns a CPU brand string.
+  inline const char* brand() const noexcept { return _brand.str; }
+
+  //! Returns CPU features.
+  inline CpuFeatures& features() noexcept { return _features; }
+  //! Returns CPU features (const).
+  inline const CpuFeatures& features() const noexcept { return _features; }
+
+  //! Tests whether the CPU has the given `feature`.
+  template<typename FeatureId>
+  inline bool hasFeature(const FeatureId& featureId) const noexcept { return _features.has(featureId); }
+
+  //! Adds the given CPU `featureId` to the list of features.
+  template<typename... Args>
+  inline void addFeature(Args&&... args) noexcept { return _features.add(std::forward<Args>(args)...); }
+
+  //! Removes the given CPU `featureId` from the list of features.
+  template<typename... Args>
+  inline void removeFeature(Args&&... args) noexcept { return _features.remove(std::forward<Args>(args)...); }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_CPUINFO_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/emithelper.cpp b/lib/lepton/asmjit/core/emithelper.cpp
new file mode 100644
index 0000000000..bcdf098f48
--- /dev/null
+++ b/lib/lepton/asmjit/core/emithelper.cpp
@@ -0,0 +1,323 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/archtraits.h"
+#include "../core/emithelper_p.h"
+#include "../core/formatter.h"
+#include "../core/funcargscontext_p.h"
+#include "../core/radefs_p.h"
+
+// Can be used for debugging...
+// #define ASMJIT_DUMP_ARGS_ASSIGNMENT
+
+ASMJIT_BEGIN_NAMESPACE
+
+// BaseEmitHelper - Formatting
+// ===========================
+
+#ifdef ASMJIT_DUMP_ARGS_ASSIGNMENT
+static void dumpFuncValue(String& sb, Arch arch, const FuncValue& value) noexcept {
+  Formatter::formatTypeId(sb, value.typeId());
+  sb.append('@');
+
+  if (value.isIndirect())
+    sb.append('[');
+
+  if (value.isReg())
+    Formatter::formatRegister(sb, 0, nullptr, arch, value.regType(), value.regId());
+  else if (value.isStack())
+    sb.appendFormat("[%d]", value.stackOffset());
+  else
+    sb.append("<none>");
+
+  if (value.isIndirect())
+    sb.append(']');
+}
+
+static void dumpAssignment(String& sb, const FuncArgsContext& ctx) noexcept {
+  typedef FuncArgsContext::Var Var;
+
+  Arch arch = ctx.arch();
+  uint32_t varCount = ctx.varCount();
+
+  for (uint32_t i = 0; i < varCount; i++) {
+    const Var& var = ctx.var(i);
+    const FuncValue& dst = var.out;
+    const FuncValue& cur = var.cur;
+
+    sb.appendFormat("Var%u: ", i);
+    dumpFuncValue(sb, arch, dst);
+    sb.append(" <- ");
+    dumpFuncValue(sb, arch, cur);
+
+    if (var.isDone())
+      sb.append(" {Done}");
+
+    sb.append('\n');
+  }
+}
+#endif
+
+// BaseEmitHelper - EmitArgsAssignment
+// ===================================
+
+ASMJIT_FAVOR_SIZE Error BaseEmitHelper::emitArgsAssignment(const FuncFrame& frame, const FuncArgsAssignment& args) {
+  typedef FuncArgsContext::Var Var;
+  typedef FuncArgsContext::WorkData WorkData;
+
+  enum WorkFlags : uint32_t {
+    kWorkNone      = 0x00,
+    kWorkDidSome   = 0x01,
+    kWorkPending   = 0x02,
+    kWorkPostponed = 0x04
+  };
+
+  Arch arch = frame.arch();
+  const ArchTraits& archTraits = ArchTraits::byArch(arch);
+
+  RAConstraints constraints;
+  FuncArgsContext ctx;
+
+  ASMJIT_PROPAGATE(constraints.init(arch));
+  ASMJIT_PROPAGATE(ctx.initWorkData(frame, args, &constraints));
+
+#ifdef ASMJIT_DUMP_ARGS_ASSIGNMENT
+  {
+    String sb;
+    dumpAssignment(sb, ctx);
+    printf("%s\n", sb.data());
+  }
+#endif
+
+  auto& workData = ctx._workData;
+  uint32_t varCount = ctx._varCount;
+  uint32_t saVarId = ctx._saVarId;
+
+  BaseReg sp = BaseReg(_emitter->_gpSignature, archTraits.spRegId());
+  BaseReg sa = sp;
+
+  if (frame.hasDynamicAlignment()) {
+    if (frame.hasPreservedFP())
+      sa.setId(archTraits.fpRegId());
+    else
+      sa.setId(saVarId < varCount ? ctx._vars[saVarId].cur.regId() : frame.saRegId());
+  }
+
+  // Register to stack and stack to stack moves must be first as now we have
+  // the biggest chance of having as many as possible unassigned registers.
+
+  if (ctx._stackDstMask) {
+    // Base address of all arguments passed by stack.
+    BaseMem baseArgPtr(sa, int32_t(frame.saOffset(sa.id())));
+    BaseMem baseStackPtr(sp, 0);
+
+    for (uint32_t varId = 0; varId < varCount; varId++) {
+      Var& var = ctx._vars[varId];
+
+      if (!var.out.isStack())
+        continue;
+
+      FuncValue& cur = var.cur;
+      FuncValue& out = var.out;
+
+      ASMJIT_ASSERT(cur.isReg() || cur.isStack());
+      BaseReg reg;
+
+      BaseMem dstStackPtr = baseStackPtr.cloneAdjusted(out.stackOffset());
+      BaseMem srcStackPtr = baseArgPtr.cloneAdjusted(cur.stackOffset());
+
+      if (cur.isIndirect()) {
+        if (cur.isStack()) {
+          // TODO: Indirect stack.
+          return DebugUtils::errored(kErrorInvalidAssignment);
+        }
+        else {
+          srcStackPtr.setBaseId(cur.regId());
+        }
+      }
+
+      if (cur.isReg() && !cur.isIndirect()) {
+        WorkData& wd = workData[archTraits.regTypeToGroup(cur.regType())];
+        uint32_t regId = cur.regId();
+
+        reg.setSignatureAndId(archTraits.regTypeToSignature(cur.regType()), regId);
+        wd.unassign(varId, regId);
+      }
+      else {
+        // Stack to reg move - tricky since we move stack to stack we can decide which register to use. In general
+        // we follow the rule that IntToInt moves will use GP regs with possibility to signature or zero extend,
+        // and all other moves will either use GP or VEC regs depending on the size of the move.
+        OperandSignature signature = getSuitableRegForMemToMemMove(arch, out.typeId(), cur.typeId());
+        if (ASMJIT_UNLIKELY(!signature.isValid()))
+          return DebugUtils::errored(kErrorInvalidState);
+
+        WorkData& wd = workData[signature.regGroup()];
+        RegMask availableRegs = wd.availableRegs();
+        if (ASMJIT_UNLIKELY(!availableRegs))
+          return DebugUtils::errored(kErrorInvalidState);
+
+        uint32_t availableId = Support::ctz(availableRegs);
+        reg.setSignatureAndId(signature, availableId);
+
+        ASMJIT_PROPAGATE(emitArgMove(reg, out.typeId(), srcStackPtr, cur.typeId()));
+      }
+
+      if (cur.isIndirect() && cur.isReg())
+        workData[RegGroup::kGp].unassign(varId, cur.regId());
+
+      // Register to stack move.
+      ASMJIT_PROPAGATE(emitRegMove(dstStackPtr, reg, cur.typeId()));
+      var.markDone();
+    }
+  }
+
+  // Shuffle all registers that are currently assigned accordingly to target assignment.
+
+  uint32_t workFlags = kWorkNone;
+  for (;;) {
+    for (uint32_t varId = 0; varId < varCount; varId++) {
+      Var& var = ctx._vars[varId];
+      if (var.isDone() || !var.cur.isReg())
+        continue;
+
+      FuncValue& cur = var.cur;
+      FuncValue& out = var.out;
+
+      RegGroup curGroup = archTraits.regTypeToGroup(cur.regType());
+      RegGroup outGroup = archTraits.regTypeToGroup(out.regType());
+
+      uint32_t curId = cur.regId();
+      uint32_t outId = out.regId();
+
+      if (curGroup != outGroup) {
+        // TODO: Conversion is not supported.
+        return DebugUtils::errored(kErrorInvalidAssignment);
+      }
+      else {
+        WorkData& wd = workData[outGroup];
+        if (!wd.isAssigned(outId)) {
+EmitMove:
+          ASMJIT_PROPAGATE(
+            emitArgMove(
+              BaseReg(archTraits.regTypeToSignature(out.regType()), outId), out.typeId(),
+              BaseReg(archTraits.regTypeToSignature(cur.regType()), curId), cur.typeId()));
+
+          wd.reassign(varId, outId, curId);
+          cur.initReg(out.regType(), outId, out.typeId());
+
+          if (outId == out.regId())
+            var.markDone();
+          workFlags |= kWorkDidSome | kWorkPending;
+        }
+        else {
+          uint32_t altId = wd._physToVarId[outId];
+          Var& altVar = ctx._vars[altId];
+
+          if (!altVar.out.isInitialized() || (altVar.out.isReg() && altVar.out.regId() == curId)) {
+            // Only few architectures provide swap operations, and only for few register groups.
+            if (archTraits.hasInstRegSwap(curGroup)) {
+              RegType highestType = Support::max(cur.regType(), altVar.cur.regType());
+              if (Support::isBetween(highestType, RegType::kGp8Lo, RegType::kGp16))
+                highestType = RegType::kGp32;
+
+              OperandSignature signature = archTraits.regTypeToSignature(highestType);
+              ASMJIT_PROPAGATE(
+                emitRegSwap(BaseReg(signature, outId), BaseReg(signature, curId)));
+
+              wd.swap(varId, curId, altId, outId);
+              cur.setRegId(outId);
+              var.markDone();
+              altVar.cur.setRegId(curId);
+
+              if (altVar.out.isInitialized())
+                altVar.markDone();
+              workFlags |= kWorkDidSome;
+            }
+            else {
+              // If there is a scratch register it can be used to perform the swap.
+              RegMask availableRegs = wd.availableRegs();
+              if (availableRegs) {
+                RegMask inOutRegs = wd.dstRegs();
+                if (availableRegs & ~inOutRegs)
+                  availableRegs &= ~inOutRegs;
+                outId = Support::ctz(availableRegs);
+                goto EmitMove;
+              }
+              else {
+                workFlags |= kWorkPending;
+              }
+            }
+          }
+          else {
+            workFlags |= kWorkPending;
+          }
+        }
+      }
+    }
+
+    if (!(workFlags & kWorkPending))
+      break;
+
+    // If we did nothing twice it means that something is really broken.
+    if ((workFlags & (kWorkDidSome | kWorkPostponed)) == kWorkPostponed)
+      return DebugUtils::errored(kErrorInvalidState);
+
+    workFlags = (workFlags & kWorkDidSome) ? kWorkNone : kWorkPostponed;
+  }
+
+  // Load arguments passed by stack into registers. This is pretty simple and
+  // it never requires multiple iterations like the previous phase.
+
+  if (ctx._hasStackSrc) {
+    uint32_t iterCount = 1;
+    if (frame.hasDynamicAlignment() && !frame.hasPreservedFP())
+      sa.setId(saVarId < varCount ? ctx._vars[saVarId].cur.regId() : frame.saRegId());
+
+    // Base address of all arguments passed by stack.
+    BaseMem baseArgPtr(sa, int32_t(frame.saOffset(sa.id())));
+
+    for (uint32_t iter = 0; iter < iterCount; iter++) {
+      for (uint32_t varId = 0; varId < varCount; varId++) {
+        Var& var = ctx._vars[varId];
+        if (var.isDone())
+          continue;
+
+        if (var.cur.isStack()) {
+          ASMJIT_ASSERT(var.out.isReg());
+
+          uint32_t outId = var.out.regId();
+          RegType outType = var.out.regType();
+
+          RegGroup group = archTraits.regTypeToGroup(outType);
+          WorkData& wd = workData[group];
+
+          if (outId == sa.id() && group == RegGroup::kGp) {
+            // This register will be processed last as we still need `saRegId`.
+            if (iterCount == 1) {
+              iterCount++;
+              continue;
+            }
+            wd.unassign(wd._physToVarId[outId], outId);
+          }
+
+          BaseReg dstReg = BaseReg(archTraits.regTypeToSignature(outType), outId);
+          BaseMem srcMem = baseArgPtr.cloneAdjusted(var.cur.stackOffset());
+
+          ASMJIT_PROPAGATE(emitArgMove(
+            dstReg, var.out.typeId(),
+            srcMem, var.cur.typeId()));
+
+          wd.assign(varId, outId);
+          var.cur.initReg(outType, outId, var.cur.typeId(), FuncValue::kFlagIsDone);
+        }
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/emithelper_p.h b/lib/lepton/asmjit/core/emithelper_p.h
new file mode 100644
index 0000000000..0333959e14
--- /dev/null
+++ b/lib/lepton/asmjit/core/emithelper_p.h
@@ -0,0 +1,58 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_EMITHELPER_P_H_INCLUDED
+#define ASMJIT_CORE_EMITHELPER_P_H_INCLUDED
+
+#include "../core/emitter.h"
+#include "../core/operand.h"
+#include "../core/type.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_core
+//! \{
+
+//! Helper class that provides utilities for each supported architecture.
+class BaseEmitHelper {
+public:
+  BaseEmitter* _emitter;
+
+  inline explicit BaseEmitHelper(BaseEmitter* emitter = nullptr) noexcept
+    : _emitter(emitter) {}
+
+  inline BaseEmitter* emitter() const noexcept { return _emitter; }
+  inline void setEmitter(BaseEmitter* emitter) noexcept { _emitter = emitter; }
+
+  //! Emits a pure move operation between two registers or the same type or between a register and its home
+  //! slot. This function does not handle register conversion.
+  virtual Error emitRegMove(
+    const Operand_& dst_,
+    const Operand_& src_, TypeId typeId, const char* comment = nullptr) = 0;
+
+  //! Emits swap between two registers.
+  virtual Error emitRegSwap(
+    const BaseReg& a,
+    const BaseReg& b, const char* comment = nullptr) = 0;
+
+  //! Emits move from a function argument (either register or stack) to a register.
+  //!
+  //! This function can handle the necessary conversion from one argument to another, and from one register type
+  //! to another, if it's possible. Any attempt of conversion that requires third register of a different group
+  //! (for example conversion from K to MMX on X86/X64) will fail.
+  virtual Error emitArgMove(
+    const BaseReg& dst_, TypeId dstTypeId,
+    const Operand_& src_, TypeId srcTypeId, const char* comment = nullptr) = 0;
+
+  Error emitArgsAssignment(const FuncFrame& frame, const FuncArgsAssignment& args);
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_EMITHELPER_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/emitter.cpp b/lib/lepton/asmjit/core/emitter.cpp
new file mode 100644
index 0000000000..92d67a7e5b
--- /dev/null
+++ b/lib/lepton/asmjit/core/emitter.cpp
@@ -0,0 +1,333 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/emitterutils_p.h"
+#include "../core/errorhandler.h"
+#include "../core/logger.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// BaseEmitter - Construction & Destruction
+// ========================================
+
+BaseEmitter::BaseEmitter(EmitterType emitterType) noexcept
+  : _emitterType(emitterType) {}
+
+BaseEmitter::~BaseEmitter() noexcept {
+  if (_code) {
+    _addEmitterFlags(EmitterFlags::kDestroyed);
+    _code->detach(this);
+  }
+}
+
+// BaseEmitter - Finalize
+// ======================
+
+Error BaseEmitter::finalize() {
+  // Does nothing by default, overridden by `BaseBuilder` and `BaseCompiler`.
+  return kErrorOk;
+}
+
+// BaseEmitter - Internals
+// =======================
+
+static constexpr EmitterFlags kEmitterPreservedFlags = EmitterFlags::kOwnLogger | EmitterFlags::kOwnErrorHandler;
+
+static ASMJIT_NOINLINE void BaseEmitter_updateForcedOptions(BaseEmitter* self) noexcept {
+  bool emitComments = false;
+  bool hasDiagnosticOptions = false;
+
+  if (self->emitterType() == EmitterType::kAssembler) {
+    // Assembler: Don't emit comments if logger is not attached.
+    emitComments = self->_code != nullptr && self->_logger != nullptr;
+    hasDiagnosticOptions = self->hasDiagnosticOption(DiagnosticOptions::kValidateAssembler);
+  }
+  else {
+    // Builder/Compiler: Always emit comments, we cannot assume they won't be used.
+    emitComments = self->_code != nullptr;
+    hasDiagnosticOptions = self->hasDiagnosticOption(DiagnosticOptions::kValidateIntermediate);
+  }
+
+  if (emitComments)
+    self->_addEmitterFlags(EmitterFlags::kLogComments);
+  else
+    self->_clearEmitterFlags(EmitterFlags::kLogComments);
+
+  // The reserved option tells emitter (Assembler/Builder/Compiler) that there may be either a border
+  // case (CodeHolder not attached, for example) or that logging or validation is required.
+  if (self->_code == nullptr || self->_logger || hasDiagnosticOptions)
+    self->_forcedInstOptions |= InstOptions::kReserved;
+  else
+    self->_forcedInstOptions &= ~InstOptions::kReserved;
+}
+
+// BaseEmitter - Diagnostic Options
+// ================================
+
+void BaseEmitter::addDiagnosticOptions(DiagnosticOptions options) noexcept {
+  _diagnosticOptions |= options;
+  BaseEmitter_updateForcedOptions(this);
+}
+
+void BaseEmitter::clearDiagnosticOptions(DiagnosticOptions options) noexcept {
+  _diagnosticOptions &= ~options;
+  BaseEmitter_updateForcedOptions(this);
+}
+
+// BaseEmitter - Logging
+// =====================
+
+void BaseEmitter::setLogger(Logger* logger) noexcept {
+#ifndef ASMJIT_NO_LOGGING
+  if (logger) {
+    _logger = logger;
+    _addEmitterFlags(EmitterFlags::kOwnLogger);
+  }
+  else {
+    _logger = nullptr;
+    _clearEmitterFlags(EmitterFlags::kOwnLogger);
+    if (_code)
+      _logger = _code->logger();
+  }
+  BaseEmitter_updateForcedOptions(this);
+#else
+  DebugUtils::unused(logger);
+#endif
+}
+
+// BaseEmitter - Error Handling
+// ============================
+
+void BaseEmitter::setErrorHandler(ErrorHandler* errorHandler) noexcept {
+  if (errorHandler) {
+    _errorHandler = errorHandler;
+    _addEmitterFlags(EmitterFlags::kOwnErrorHandler);
+  }
+  else {
+    _errorHandler = nullptr;
+    _clearEmitterFlags(EmitterFlags::kOwnErrorHandler);
+    if (_code)
+      _errorHandler = _code->errorHandler();
+  }
+}
+
+Error BaseEmitter::reportError(Error err, const char* message) {
+  ErrorHandler* eh = _errorHandler;
+  if (eh) {
+    if (!message)
+      message = DebugUtils::errorAsString(err);
+    eh->handleError(err, message, this);
+  }
+  return err;
+}
+
+// BaseEmitter - Labels
+// ====================
+
+Label BaseEmitter::labelByName(const char* name, size_t nameSize, uint32_t parentId) noexcept {
+  return Label(_code ? _code->labelIdByName(name, nameSize, parentId) : Globals::kInvalidId);
+}
+
+bool BaseEmitter::isLabelValid(uint32_t labelId) const noexcept {
+  return _code && labelId < _code->labelCount();
+}
+
+// BaseEmitter - Emit (Low-Level)
+// ==============================
+
+using EmitterUtils::noExt;
+
+Error BaseEmitter::_emitI(InstId instId) {
+  return _emit(instId, noExt[0], noExt[1], noExt[2], noExt);
+}
+
+Error BaseEmitter::_emitI(InstId instId, const Operand_& o0) {
+  return _emit(instId, o0, noExt[1], noExt[2], noExt);
+}
+
+Error BaseEmitter::_emitI(InstId instId, const Operand_& o0, const Operand_& o1) {
+  return _emit(instId, o0, o1, noExt[2], noExt);
+}
+
+Error BaseEmitter::_emitI(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2) {
+  return _emit(instId, o0, o1, o2, noExt);
+}
+
+Error BaseEmitter::_emitI(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) {
+  Operand_ opExt[3] = { o3 };
+  return _emit(instId, o0, o1, o2, opExt);
+}
+
+Error BaseEmitter::_emitI(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4) {
+  Operand_ opExt[3] = { o3, o4 };
+  return _emit(instId, o0, o1, o2, opExt);
+}
+
+Error BaseEmitter::_emitI(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4, const Operand_& o5) {
+  Operand_ opExt[3] = { o3, o4, o5 };
+  return _emit(instId, o0, o1, o2, opExt);
+}
+
+Error BaseEmitter::_emitOpArray(InstId instId, const Operand_* operands, size_t opCount) {
+  const Operand_* op = operands;
+  Operand_ opExt[3];
+
+  switch (opCount) {
+    case 0:
+      return _emit(instId, noExt[0], noExt[1], noExt[2], noExt);
+
+    case 1:
+      return _emit(instId, op[0], noExt[1], noExt[2], noExt);
+
+    case 2:
+      return _emit(instId, op[0], op[1], noExt[2], noExt);
+
+    case 3:
+      return _emit(instId, op[0], op[1], op[2], noExt);
+
+    case 4:
+      opExt[0] = op[3];
+      opExt[1].reset();
+      opExt[2].reset();
+      return _emit(instId, op[0], op[1], op[2], opExt);
+
+    case 5:
+      opExt[0] = op[3];
+      opExt[1] = op[4];
+      opExt[2].reset();
+      return _emit(instId, op[0], op[1], op[2], opExt);
+
+    case 6:
+      return _emit(instId, op[0], op[1], op[2], op + 3);
+
+    default:
+      return DebugUtils::errored(kErrorInvalidArgument);
+  }
+}
+
+// BaseEmitter - Emit Utilities
+// ============================
+
+Error BaseEmitter::emitProlog(const FuncFrame& frame) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  return _funcs.emitProlog(this, frame);
+}
+
+Error BaseEmitter::emitEpilog(const FuncFrame& frame) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  return _funcs.emitEpilog(this, frame);
+}
+
+Error BaseEmitter::emitArgsAssignment(const FuncFrame& frame, const FuncArgsAssignment& args) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  return _funcs.emitArgsAssignment(this, frame, args);
+}
+
+// BaseEmitter - Comment
+// =====================
+
+Error BaseEmitter::commentf(const char* fmt, ...) {
+  if (!hasEmitterFlag(EmitterFlags::kLogComments)) {
+    if (!hasEmitterFlag(EmitterFlags::kAttached))
+      return reportError(DebugUtils::errored(kErrorNotInitialized));
+    return kErrorOk;
+  }
+
+#ifndef ASMJIT_NO_LOGGING
+  StringTmp<1024> sb;
+
+  va_list ap;
+  va_start(ap, fmt);
+  Error err = sb.appendVFormat(fmt, ap);
+  va_end(ap);
+
+  ASMJIT_PROPAGATE(err);
+  return comment(sb.data(), sb.size());
+#else
+  DebugUtils::unused(fmt);
+  return kErrorOk;
+#endif
+}
+
+Error BaseEmitter::commentv(const char* fmt, va_list ap) {
+  if (!hasEmitterFlag(EmitterFlags::kLogComments)) {
+    if (!hasEmitterFlag(EmitterFlags::kAttached))
+      return reportError(DebugUtils::errored(kErrorNotInitialized));
+    return kErrorOk;
+  }
+
+#ifndef ASMJIT_NO_LOGGING
+  StringTmp<1024> sb;
+  Error err = sb.appendVFormat(fmt, ap);
+
+  ASMJIT_PROPAGATE(err);
+  return comment(sb.data(), sb.size());
+#else
+  DebugUtils::unused(fmt, ap);
+  return kErrorOk;
+#endif
+}
+
+// BaseEmitter - Events
+// ====================
+
+Error BaseEmitter::onAttach(CodeHolder* code) noexcept {
+  _code = code;
+  _environment = code->environment();
+  _addEmitterFlags(EmitterFlags::kAttached);
+
+  const ArchTraits& archTraits = ArchTraits::byArch(code->arch());
+  RegType nativeRegType = Environment::is32Bit(code->arch()) ? RegType::kGp32 : RegType::kGp64;
+  _gpSignature = archTraits.regTypeToSignature(nativeRegType);
+
+  onSettingsUpdated();
+  return kErrorOk;
+}
+
+Error BaseEmitter::onDetach(CodeHolder* code) noexcept {
+  DebugUtils::unused(code);
+
+  if (!hasOwnLogger())
+    _logger = nullptr;
+
+  if (!hasOwnErrorHandler())
+    _errorHandler = nullptr;
+
+  _clearEmitterFlags(~kEmitterPreservedFlags);
+  _forcedInstOptions = InstOptions::kReserved;
+  _privateData = 0;
+
+  _environment.reset();
+  _gpSignature.reset();
+
+  _instOptions = InstOptions::kNone;
+  _extraReg.reset();
+  _inlineComment = nullptr;
+
+  return kErrorOk;
+}
+
+void BaseEmitter::onSettingsUpdated() noexcept {
+  // Only called when attached to CodeHolder by CodeHolder.
+  ASMJIT_ASSERT(_code != nullptr);
+
+  if (!hasOwnLogger())
+    _logger = _code->logger();
+
+  if (!hasOwnErrorHandler())
+    _errorHandler = _code->errorHandler();
+
+  BaseEmitter_updateForcedOptions(this);
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/emitter.h b/lib/lepton/asmjit/core/emitter.h
new file mode 100644
index 0000000000..b8afd6b8e0
--- /dev/null
+++ b/lib/lepton/asmjit/core/emitter.h
@@ -0,0 +1,741 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_EMITTER_H_INCLUDED
+#define ASMJIT_CORE_EMITTER_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/codeholder.h"
+#include "../core/formatter.h"
+#include "../core/inst.h"
+#include "../core/operand.h"
+#include "../core/type.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_core
+//! \{
+
+class ConstPool;
+class FuncFrame;
+class FuncArgsAssignment;
+
+//! Align mode, used by \ref BaseEmitter::align().
+enum class AlignMode : uint8_t {
+  //! Align executable code.
+  kCode = 0,
+  //! Align non-executable code.
+  kData = 1,
+  //! Align by a sequence of zeros.
+  kZero = 2,
+
+  //! Maximum value of `AlignMode`.
+  kMaxValue = kZero
+};
+
+//! Emitter type used by \ref BaseEmitter.
+enum class EmitterType : uint8_t {
+  //! Unknown or uninitialized.
+  kNone = 0,
+  //! Emitter inherits from \ref BaseAssembler.
+  kAssembler = 1,
+  //! Emitter inherits from \ref BaseBuilder.
+  kBuilder = 2,
+  //! Emitter inherits from \ref BaseCompiler.
+  kCompiler = 3,
+
+  //! Maximum value of `EmitterType`.
+  kMaxValue = kCompiler
+};
+
+//! Emitter flags, used by \ref BaseEmitter.
+enum class EmitterFlags : uint8_t {
+  //! No flags.
+  kNone = 0u,
+  //! Emitter is attached to CodeHolder.
+  kAttached = 0x01u,
+  //! The emitter must emit comments.
+  kLogComments = 0x08u,
+  //! The emitter has its own \ref Logger (not propagated from \ref CodeHolder).
+  kOwnLogger = 0x10u,
+  //! The emitter has its own \ref ErrorHandler (not propagated from \ref CodeHolder).
+  kOwnErrorHandler = 0x20u,
+  //! The emitter was finalized.
+  kFinalized = 0x40u,
+  //! The emitter was destroyed.
+  //!
+  //! This flag is used for a very short time when an emitter is being destroyed by
+  //! CodeHolder.
+  kDestroyed = 0x80u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(EmitterFlags)
+
+//! Encoding options.
+enum class EncodingOptions : uint32_t {
+  //! No encoding options.
+  kNone = 0,
+
+  //! Emit instructions that are optimized for size, if possible.
+  //!
+  //! Default: false.
+  //!
+  //! X86 Specific
+  //! ------------
+  //!
+  //! When this option is set it the assembler will try to fix instructions if possible into operation equivalent
+  //! instructions that take less bytes by taking advantage of implicit zero extension. For example instruction
+  //! like `mov r64, imm` and `and r64, imm` can be translated to `mov r32, imm` and `and r32, imm` when the
+  //! immediate constant is lesser than `2^31`.
+  kOptimizeForSize = 0x00000001u,
+
+  //! Emit optimized code-alignment sequences.
+  //!
+  //! Default: false.
+  //!
+  //! X86 Specific
+  //! ------------
+  //!
+  //! Default align sequence used by X86 architecture is one-byte (0x90) opcode that is often shown by disassemblers
+  //! as NOP. However there are more optimized align sequences for 2-11 bytes that may execute faster on certain CPUs.
+  //! If this feature is enabled AsmJit will generate specialized sequences for alignment between 2 to 11 bytes.
+  kOptimizedAlign = 0x00000002u,
+
+  //! Emit jump-prediction hints.
+  //!
+  //! Default: false.
+  //!
+  //! X86 Specific
+  //! ------------
+  //!
+  //! Jump prediction is usually based on the direction of the jump. If the jump is backward it is usually predicted as
+  //! taken; and if the jump is forward it is usually predicted as not-taken. The reason is that loops generally use
+  //! backward jumps and conditions usually use forward jumps. However this behavior can be overridden by using
+  //! instruction prefixes. If this option is enabled these hints will be emitted.
+  //!
+  //! This feature is disabled by default, because the only processor that used to take into consideration prediction
+  //! hints was P4. Newer processors implement heuristics for branch prediction and ignore static hints. This means
+  //! that this feature can be only used for annotation purposes.
+  kPredictedJumps = 0x00000010u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(EncodingOptions)
+
+//! Diagnostic options are used to tell emitters and their passes to perform diagnostics when emitting or processing
+//! user code. These options control validation and extra diagnostics that can be performed by higher level emitters.
+//!
+//! Instruction Validation
+//! ----------------------
+//!
+//! \ref BaseAssembler implementation perform by default only basic checks that are necessary to identify all
+//! variations of an instruction so the correct encoding can be selected. This is fine for production-ready code
+//! as the assembler doesn't have to perform checks that would slow it down. However, sometimes these checks are
+//! beneficial especially when the project that uses AsmJit is in a development phase, in which mistakes happen
+//! often. To make the experience of using AsmJit seamless it offers validation features that can be controlled
+//! by \ref DiagnosticOptions.
+//!
+//! Compiler Diagnostics
+//! --------------------
+//!
+//! Diagnostic options work with \ref BaseCompiler passes (precisely with its register allocation pass). These options
+//! can be used to enable logging of all operations that the Compiler does.
+enum class DiagnosticOptions : uint32_t {
+  //! No validation options.
+  kNone = 0,
+
+  //! Perform strict validation in \ref BaseAssembler::emit() implementations.
+  //!
+  //! This flag ensures that each instruction is checked before it's encoded into a binary representation. This flag
+  //! is only relevant for \ref BaseAssembler implementations, but can be set in any other emitter type, in that case
+  //! if that emitter needs to create an assembler on its own, for the purpose of \ref BaseEmitter::finalize() it
+  //! would propagate this flag to such assembler so all instructions passed to it are explicitly validated.
+  //!
+  //! Default: false.
+  kValidateAssembler = 0x00000001u,
+
+  //! Perform strict validation in \ref BaseBuilder::emit() and \ref BaseCompiler::emit() implementations.
+  //!
+  //! This flag ensures that each instruction is checked before an \ref InstNode representing the instruction is
+  //! created by \ref BaseBuilder or \ref BaseCompiler. This option could be more useful than \ref kValidateAssembler
+  //! in cases in which there is an invalid instruction passed to an assembler, which was invalid much earlier, most
+  //! likely when such instruction was passed to Builder/Compiler.
+  //!
+  //! This is a separate option that was introduced, because it's possible to manipulate the instruction stream
+  //! emitted by \ref BaseBuilder and \ref BaseCompiler - this means that it's allowed to emit invalid instructions
+  //! (for example with missing operands) that will be fixed later before finalizing it.
+  //!
+  //! Default: false.
+  kValidateIntermediate = 0x00000002u,
+
+  //! Annotate all nodes processed by register allocator (Compiler/RA).
+  //!
+  //! \note Annotations don't need debug options, however, some debug options like `kRADebugLiveness` may influence
+  //! their output (for example the mentioned option would add liveness information to per-instruction annotation).
+  kRAAnnotate = 0x00000080u,
+
+  //! Debug CFG generation and other related algorithms / operations (Compiler/RA).
+  kRADebugCFG = 0x00000100u,
+
+  //! Debug liveness analysis (Compiler/RA).
+  kRADebugLiveness = 0x00000200u,
+
+  //! Debug register allocation assignment (Compiler/RA).
+  kRADebugAssignment = 0x00000400u,
+
+  //! Debug the removal of code part of unreachable blocks.
+  kRADebugUnreachable = 0x00000800u,
+
+  //! Enable all debug options (Compiler/RA).
+  kRADebugAll = 0x0000FF00u,
+};
+ASMJIT_DEFINE_ENUM_FLAGS(DiagnosticOptions)
+
+//! Provides a base foundation to emitting code - specialized by \ref BaseAssembler and \ref BaseBuilder.
+class ASMJIT_VIRTAPI BaseEmitter {
+public:
+  ASMJIT_BASE_CLASS(BaseEmitter)
+
+  //! \name Members
+  //! \{
+
+  //! See \ref EmitterType.
+  EmitterType _emitterType = EmitterType::kNone;
+  //! See \ref EmitterFlags.
+  EmitterFlags _emitterFlags = EmitterFlags::kNone;
+  //! Validation flags in case validation is used.
+  //!
+  //! \note Validation flags are specific to the emitter and they are setup at construction time and then never
+  //! changed.
+  ValidationFlags _validationFlags = ValidationFlags::kNone;
+  //! Validation options.
+  DiagnosticOptions _diagnosticOptions = DiagnosticOptions::kNone;
+
+  //! All supported architectures in a bit-mask, where LSB is the bit with a zero index.
+  uint64_t _archMask = 0;
+
+  //! Encoding options.
+  EncodingOptions _encodingOptions = EncodingOptions::kNone;
+
+  //! Forced instruction options, combined with \ref _instOptions by \ref emit().
+  InstOptions _forcedInstOptions = InstOptions::kReserved;
+  //! Internal private data used freely by any emitter.
+  uint32_t _privateData = 0;
+
+  //! CodeHolder the emitter is attached to.
+  CodeHolder* _code = nullptr;
+  //! Attached \ref Logger.
+  Logger* _logger = nullptr;
+  //! Attached \ref ErrorHandler.
+  ErrorHandler* _errorHandler = nullptr;
+
+  //! Describes the target environment, matches \ref CodeHolder::environment().
+  Environment _environment {};
+  //! Native GP register signature and signature related information.
+  OperandSignature _gpSignature {};
+
+  //! Next instruction options (affects the next instruction).
+  InstOptions _instOptions = InstOptions::kNone;
+  //! Extra register (op-mask {k} on AVX-512) (affects the next instruction).
+  RegOnly _extraReg {};
+  //! Inline comment of the next instruction (affects the next instruction).
+  const char* _inlineComment = nullptr;
+
+  //! Function callbacks used by emitter implementation.
+  //!
+  //! These are typically shared between Assembler/Builder/Compiler of a single backend.
+  struct Funcs {
+    typedef Error (ASMJIT_CDECL* EmitProlog)(BaseEmitter* emitter, const FuncFrame& frame);
+    typedef Error (ASMJIT_CDECL* EmitEpilog)(BaseEmitter* emitter, const FuncFrame& frame);
+    typedef Error (ASMJIT_CDECL* EmitArgsAssignment)(BaseEmitter* emitter, const FuncFrame& frame, const FuncArgsAssignment& args);
+
+    typedef Error (ASMJIT_CDECL* FormatInstruction)(
+      String& sb,
+      FormatFlags formatFlags,
+      const BaseEmitter* emitter,
+      Arch arch,
+      const BaseInst& inst, const Operand_* operands, size_t opCount) ASMJIT_NOEXCEPT_TYPE;
+
+    typedef Error (ASMJIT_CDECL* ValidateFunc)(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags) ASMJIT_NOEXCEPT_TYPE;
+
+    //! Emit prolog implementation.
+    EmitProlog emitProlog;
+    //! Emit epilog implementation.
+    EmitEpilog emitEpilog;
+    //! Emit arguments assignment implementation.
+    EmitArgsAssignment emitArgsAssignment;
+    //! Instruction formatter implementation.
+    FormatInstruction formatInstruction;
+    //! Instruction validation implementation.
+    ValidateFunc validate;
+
+    //! Resets all functions to nullptr.
+    inline void reset() noexcept {
+      emitProlog = nullptr;
+      emitEpilog = nullptr;
+      emitArgsAssignment = nullptr;
+      validate = nullptr;
+    }
+  };
+
+  Funcs _funcs {};
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API explicit BaseEmitter(EmitterType emitterType) noexcept;
+  ASMJIT_API virtual ~BaseEmitter() noexcept;
+
+  //! \}
+
+  //! \name Cast
+  //! \{
+
+  template<typename T>
+  inline T* as() noexcept { return reinterpret_cast<T*>(this); }
+
+  template<typename T>
+  inline const T* as() const noexcept { return reinterpret_cast<const T*>(this); }
+
+  //! \}
+
+  //! \name Emitter Type & Flags
+  //! \{
+
+  //! Returns the type of this emitter, see `EmitterType`.
+  inline EmitterType emitterType() const noexcept { return _emitterType; }
+  //! Returns emitter flags , see `Flags`.
+  inline EmitterFlags emitterFlags() const noexcept { return _emitterFlags; }
+
+  //! Tests whether the emitter inherits from `BaseAssembler`.
+  inline bool isAssembler() const noexcept { return _emitterType == EmitterType::kAssembler; }
+  //! Tests whether the emitter inherits from `BaseBuilder`.
+  //!
+  //! \note Both Builder and Compiler emitters would return `true`.
+  inline bool isBuilder() const noexcept { return uint32_t(_emitterType) >= uint32_t(EmitterType::kBuilder); }
+  //! Tests whether the emitter inherits from `BaseCompiler`.
+  inline bool isCompiler() const noexcept { return _emitterType == EmitterType::kCompiler; }
+
+  //! Tests whether the emitter has the given `flag` enabled.
+  inline bool hasEmitterFlag(EmitterFlags flag) const noexcept { return Support::test(_emitterFlags, flag); }
+  //! Tests whether the emitter is finalized.
+  inline bool isFinalized() const noexcept { return hasEmitterFlag(EmitterFlags::kFinalized); }
+  //! Tests whether the emitter is destroyed (only used during destruction).
+  inline bool isDestroyed() const noexcept { return hasEmitterFlag(EmitterFlags::kDestroyed); }
+
+  inline void _addEmitterFlags(EmitterFlags flags) noexcept { _emitterFlags |= flags; }
+  inline void _clearEmitterFlags(EmitterFlags flags) noexcept { _emitterFlags &= _emitterFlags & ~flags; }
+
+  //! \}
+
+  //! \name Target Information
+  //! \{
+
+  //! Returns the CodeHolder this emitter is attached to.
+  inline CodeHolder* code() const noexcept { return _code; }
+
+  //! Returns the target environment.
+  //!
+  //! The returned \ref Environment reference matches \ref CodeHolder::environment().
+  inline const Environment& environment() const noexcept { return _environment; }
+
+  //! Tests whether the target architecture is 32-bit.
+  inline bool is32Bit() const noexcept { return environment().is32Bit(); }
+  //! Tests whether the target architecture is 64-bit.
+  inline bool is64Bit() const noexcept { return environment().is64Bit(); }
+
+  //! Returns the target architecture type.
+  inline Arch arch() const noexcept { return environment().arch(); }
+  //! Returns the target architecture sub-type.
+  inline SubArch subArch() const noexcept { return environment().subArch(); }
+
+  //! Returns the target architecture's GP register size (4 or 8 bytes).
+  inline uint32_t registerSize() const noexcept { return environment().registerSize(); }
+
+  //! \}
+
+  //! \name Initialization & Finalization
+  //! \{
+
+  //! Tests whether the emitter is initialized (i.e. attached to \ref CodeHolder).
+  inline bool isInitialized() const noexcept { return _code != nullptr; }
+
+  //! Finalizes this emitter.
+  //!
+  //! Materializes the content of the emitter by serializing it to the attached \ref CodeHolder through an architecture
+  //! specific \ref BaseAssembler. This function won't do anything if the emitter inherits from \ref BaseAssembler as
+  //! assemblers emit directly to a \ref CodeBuffer held by \ref CodeHolder. However, if this is an emitter that
+  //! inherits from \ref BaseBuilder or \ref BaseCompiler then these emitters need the materialization phase as they
+  //! store their content in a representation not visible to \ref CodeHolder.
+  ASMJIT_API virtual Error finalize();
+
+  //! \}
+
+  //! \name Logging
+  //! \{
+
+  //! Tests whether the emitter has a logger.
+  inline bool hasLogger() const noexcept { return _logger != nullptr; }
+
+  //! Tests whether the emitter has its own logger.
+  //!
+  //! Own logger means that it overrides the possible logger that may be used by \ref CodeHolder this emitter is
+  //! attached to.
+  inline bool hasOwnLogger() const noexcept { return hasEmitterFlag(EmitterFlags::kOwnLogger); }
+
+  //! Returns the logger this emitter uses.
+  //!
+  //! The returned logger is either the emitter's own logger or it's logger used by \ref CodeHolder this emitter
+  //! is attached to.
+  inline Logger* logger() const noexcept { return _logger; }
+
+  //! Sets or resets the logger of the emitter.
+  //!
+  //! If the `logger` argument is non-null then the logger will be considered emitter's own logger, see \ref
+  //! hasOwnLogger() for more details. If the given `logger` is null then the emitter will automatically use logger
+  //! that is attached to the \ref CodeHolder this emitter is attached to.
+  ASMJIT_API void setLogger(Logger* logger) noexcept;
+
+  //! Resets the logger of this emitter.
+  //!
+  //! The emitter will bail to using a logger attached to \ref CodeHolder this emitter is attached to, or no logger
+  //! at all if \ref CodeHolder doesn't have one.
+  inline void resetLogger() noexcept { return setLogger(nullptr); }
+
+  //! \}
+
+  //! \name Error Handling
+  //! \{
+
+  //! Tests whether the emitter has an error handler attached.
+  inline bool hasErrorHandler() const noexcept { return _errorHandler != nullptr; }
+
+  //! Tests whether the emitter has its own error handler.
+  //!
+  //! Own error handler means that it overrides the possible error handler that may be used by \ref CodeHolder this
+  //! emitter is attached to.
+  inline bool hasOwnErrorHandler() const noexcept { return hasEmitterFlag(EmitterFlags::kOwnErrorHandler); }
+
+  //! Returns the error handler this emitter uses.
+  //!
+  //! The returned error handler is either the emitter's own error handler or it's error handler used by
+  //! \ref CodeHolder this emitter is attached to.
+  inline ErrorHandler* errorHandler() const noexcept { return _errorHandler; }
+
+  //! Sets or resets the error handler of the emitter.
+  ASMJIT_API void setErrorHandler(ErrorHandler* errorHandler) noexcept;
+
+  //! Resets the error handler.
+  inline void resetErrorHandler() noexcept { setErrorHandler(nullptr); }
+
+  //! Handles the given error in the following way:
+  //!   1. If the emitter has \ref ErrorHandler attached, it calls its \ref ErrorHandler::handleError() member function
+  //!      first, and then returns the error. The `handleError()` function may throw.
+  //!   2. if the emitter doesn't have \ref ErrorHandler, the error is simply returned.
+  ASMJIT_API Error reportError(Error err, const char* message = nullptr);
+
+  //! \}
+
+  //! \name Encoding Options
+  //! \{
+
+  //! Returns encoding options.
+  inline EncodingOptions encodingOptions() const noexcept { return _encodingOptions; }
+  //! Tests whether the encoding `option` is set.
+  inline bool hasEncodingOption(EncodingOptions option) const noexcept { return Support::test(_encodingOptions, option); }
+
+  //! Enables the given encoding `options`.
+  inline void addEncodingOptions(EncodingOptions options) noexcept { _encodingOptions |= options; }
+  //! Disables the given encoding `options`.
+  inline void clearEncodingOptions(EncodingOptions options) noexcept { _encodingOptions &= ~options; }
+
+  //! \}
+
+  //! \name Diagnostic Options
+  //! \{
+
+  //! Returns the emitter's diagnostic options.
+  inline DiagnosticOptions diagnosticOptions() const noexcept { return _diagnosticOptions; }
+
+  //! Tests whether the given `option` is present in the emitter's diagnostic options.
+  inline bool hasDiagnosticOption(DiagnosticOptions option) const noexcept { return Support::test(_diagnosticOptions, option); }
+
+  //! Activates the given diagnostic `options`.
+  //!
+  //! This function is used to activate explicit validation options that will be then used by all emitter
+  //! implementations. There are in general two possibilities:
+  //!
+  //!   - Architecture specific assembler is used. In this case a \ref DiagnosticOptions::kValidateAssembler can be
+  //!     used to turn on explicit validation that will be used before an instruction is emitted. This means that
+  //!     internally an extra step will be performed to make sure that the instruction is correct. This is needed,
+  //!     because by default assemblers prefer speed over strictness.
+  //!
+  //!     This option should be used in debug builds as it's pretty expensive.
+  //!
+  //!   - Architecture specific builder or compiler is used. In this case the user can turn on
+  //!     \ref DiagnosticOptions::kValidateIntermediate option that adds explicit validation step before the Builder
+  //!     or Compiler creates an \ref InstNode to represent an emitted instruction. Error will be returned if the
+  //!     instruction is ill-formed. In addition, also \ref DiagnosticOptions::kValidateAssembler can be used, which
+  //!     would not be consumed by Builder / Compiler directly, but it would be propagated to an architecture specific
+  //!     \ref BaseAssembler implementation it creates during \ref BaseEmitter::finalize().
+  ASMJIT_API void addDiagnosticOptions(DiagnosticOptions options) noexcept;
+
+  //! Deactivates the given validation `options`.
+  //!
+  //! See \ref addDiagnosticOptions() and \ref DiagnosticOptions for more details.
+  ASMJIT_API void clearDiagnosticOptions(DiagnosticOptions options) noexcept;
+
+  //! \}
+
+  //! \name Instruction Options
+  //! \{
+
+  //! Returns forced instruction options.
+  //!
+  //! Forced instruction options are merged with next instruction options before the instruction is encoded. These
+  //! options have some bits reserved that are used by error handling, logging, and instruction validation purposes.
+  //! Other options are globals that affect each instruction.
+  inline InstOptions forcedInstOptions() const noexcept { return _forcedInstOptions; }
+
+  //! Returns options of the next instruction.
+  inline InstOptions instOptions() const noexcept { return _instOptions; }
+  //! Returns options of the next instruction.
+  inline void setInstOptions(InstOptions options) noexcept { _instOptions = options; }
+  //! Adds options of the next instruction.
+  inline void addInstOptions(InstOptions options) noexcept { _instOptions |= options; }
+  //! Resets options of the next instruction.
+  inline void resetInstOptions() noexcept { _instOptions = InstOptions::kNone; }
+
+  //! Tests whether the extra register operand is valid.
+  inline bool hasExtraReg() const noexcept { return _extraReg.isReg(); }
+  //! Returns an extra operand that will be used by the next instruction (architecture specific).
+  inline const RegOnly& extraReg() const noexcept { return _extraReg; }
+  //! Sets an extra operand that will be used by the next instruction (architecture specific).
+  inline void setExtraReg(const BaseReg& reg) noexcept { _extraReg.init(reg); }
+  //! Sets an extra operand that will be used by the next instruction (architecture specific).
+  inline void setExtraReg(const RegOnly& reg) noexcept { _extraReg.init(reg); }
+  //! Resets an extra operand that will be used by the next instruction (architecture specific).
+  inline void resetExtraReg() noexcept { _extraReg.reset(); }
+
+  //! Returns comment/annotation of the next instruction.
+  inline const char* inlineComment() const noexcept { return _inlineComment; }
+  //! Sets comment/annotation of the next instruction.
+  //!
+  //! \note This string is set back to null by `_emit()`, but until that it has to remain valid as the Emitter is not
+  //! required to make a copy of it (and it would be slow to do that for each instruction).
+  inline void setInlineComment(const char* s) noexcept { _inlineComment = s; }
+  //! Resets the comment/annotation to nullptr.
+  inline void resetInlineComment() noexcept { _inlineComment = nullptr; }
+
+  //! \}
+
+  //! \name Sections
+  //! \{
+
+  virtual Error section(Section* section) = 0;
+
+  //! \}
+
+  //! \name Labels
+  //! \{
+
+  //! Creates a new label.
+  virtual Label newLabel() = 0;
+  //! Creates a new named label.
+  virtual Label newNamedLabel(const char* name, size_t nameSize = SIZE_MAX, LabelType type = LabelType::kGlobal, uint32_t parentId = Globals::kInvalidId) = 0;
+
+  //! Creates a new anonymous label with a name, which can only be used for debugging purposes.
+  inline Label newAnonymousLabel(const char* name, size_t nameSize = SIZE_MAX) { return newNamedLabel(name, nameSize, LabelType::kAnonymous); }
+  //! Creates a new external label.
+  inline Label newExternalLabel(const char* name, size_t nameSize = SIZE_MAX) { return newNamedLabel(name, nameSize, LabelType::kExternal); }
+
+  //! Returns `Label` by `name`.
+  //!
+  //! Returns invalid Label in case that the name is invalid or label was not found.
+  //!
+  //! \note This function doesn't trigger ErrorHandler in case the name is invalid or no such label exist. You must
+  //! always check the validity of the `Label` returned.
+  ASMJIT_API Label labelByName(const char* name, size_t nameSize = SIZE_MAX, uint32_t parentId = Globals::kInvalidId) noexcept;
+
+  //! Binds the `label` to the current position of the current section.
+  //!
+  //! \note Attempt to bind the same label multiple times will return an error.
+  virtual Error bind(const Label& label) = 0;
+
+  //! Tests whether the label `id` is valid (i.e. registered).
+  ASMJIT_API bool isLabelValid(uint32_t labelId) const noexcept;
+  //! Tests whether the `label` is valid (i.e. registered).
+  inline bool isLabelValid(const Label& label) const noexcept { return isLabelValid(label.id()); }
+
+  //! \}
+
+  //! \name Emit
+  //! \{
+
+  // NOTE: These `emit()` helpers are designed to address a code-bloat generated by C++ compilers to call a function
+  // having many arguments. Each parameter to `_emit()` requires some code to pass it, which means that if we default
+  // to 5 arguments in `_emit()` and instId the C++ compiler would have to generate a virtual function call having 5
+  // parameters and additional `this` argument, which is quite a lot. Since by default most instructions have 2 to 3
+  // operands it's better to introduce helpers that pass from 0 to 6 operands that help to reduce the size of emit(...)
+  // function call.
+
+  //! Emits an instruction (internal).
+  ASMJIT_API Error _emitI(InstId instId);
+  //! \overload
+  ASMJIT_API Error _emitI(InstId instId, const Operand_& o0);
+  //! \overload
+  ASMJIT_API Error _emitI(InstId instId, const Operand_& o0, const Operand_& o1);
+  //! \overload
+  ASMJIT_API Error _emitI(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2);
+  //! \overload
+  ASMJIT_API Error _emitI(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3);
+  //! \overload
+  ASMJIT_API Error _emitI(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4);
+  //! \overload
+  ASMJIT_API Error _emitI(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4, const Operand_& o5);
+
+  //! Emits an instruction `instId` with the given `operands`.
+  template<typename... Args>
+  ASMJIT_FORCE_INLINE Error emit(InstId instId, Args&&... operands) {
+    return _emitI(instId, Support::ForwardOp<Args>::forward(operands)...);
+  }
+
+  ASMJIT_FORCE_INLINE Error emitOpArray(InstId instId, const Operand_* operands, size_t opCount) {
+    return _emitOpArray(instId, operands, opCount);
+  }
+
+  ASMJIT_FORCE_INLINE Error emitInst(const BaseInst& inst, const Operand_* operands, size_t opCount) {
+    setInstOptions(inst.options());
+    setExtraReg(inst.extraReg());
+    return _emitOpArray(inst.id(), operands, opCount);
+  }
+
+  //! \cond INTERNAL
+  //! Emits an instruction - all 6 operands must be defined.
+  virtual Error _emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* oExt) = 0;
+  //! Emits instruction having operands stored in array.
+  ASMJIT_API virtual Error _emitOpArray(InstId instId, const Operand_* operands, size_t opCount);
+  //! \endcond
+
+  //! \}
+
+  //! \name Emit Utilities
+  //! \{
+
+  ASMJIT_API Error emitProlog(const FuncFrame& frame);
+  ASMJIT_API Error emitEpilog(const FuncFrame& frame);
+  ASMJIT_API Error emitArgsAssignment(const FuncFrame& frame, const FuncArgsAssignment& args);
+
+  //! \}
+
+  //! \name Align
+  //! \{
+
+  //! Aligns the current CodeBuffer position to the `alignment` specified.
+  //!
+  //! The sequence that is used to fill the gap between the aligned location and the current location depends on the
+  //! align `mode`, see \ref AlignMode. The `alignment` argument specifies alignment in bytes, so for example when
+  //! it's `32` it means that the code buffer will be aligned to `32` bytes.
+  virtual Error align(AlignMode alignMode, uint32_t alignment) = 0;
+
+  //! \}
+
+  //! \name Embed
+  //! \{
+
+  //! Embeds raw data into the \ref CodeBuffer.
+  virtual Error embed(const void* data, size_t dataSize) = 0;
+
+  //! Embeds a typed data array.
+  //!
+  //! This is the most flexible function for embedding data as it allows to:
+  //!
+  //!   - Assign a `typeId` to the data, so the emitter knows the type of items stored in `data`. Binary data should
+  //!     use \ref TypeId::kUInt8.
+  //!
+  //!   - Repeat the given data `repeatCount` times, so the data can be used as a fill pattern for example, or as a
+  //!     pattern used by SIMD instructions.
+  virtual Error embedDataArray(TypeId typeId, const void* data, size_t itemCount, size_t repeatCount = 1) = 0;
+
+  //! Embeds int8_t `value` repeated by `repeatCount`.
+  inline Error embedInt8(int8_t value, size_t repeatCount = 1) { return embedDataArray(TypeId::kInt8, &value, 1, repeatCount); }
+  //! Embeds uint8_t `value` repeated by `repeatCount`.
+  inline Error embedUInt8(uint8_t value, size_t repeatCount = 1) { return embedDataArray(TypeId::kUInt8, &value, 1, repeatCount); }
+  //! Embeds int16_t `value` repeated by `repeatCount`.
+  inline Error embedInt16(int16_t value, size_t repeatCount = 1) { return embedDataArray(TypeId::kInt16, &value, 1, repeatCount); }
+  //! Embeds uint16_t `value` repeated by `repeatCount`.
+  inline Error embedUInt16(uint16_t value, size_t repeatCount = 1) { return embedDataArray(TypeId::kUInt16, &value, 1, repeatCount); }
+  //! Embeds int32_t `value` repeated by `repeatCount`.
+  inline Error embedInt32(int32_t value, size_t repeatCount = 1) { return embedDataArray(TypeId::kInt32, &value, 1, repeatCount); }
+  //! Embeds uint32_t `value` repeated by `repeatCount`.
+  inline Error embedUInt32(uint32_t value, size_t repeatCount = 1) { return embedDataArray(TypeId::kUInt32, &value, 1, repeatCount); }
+  //! Embeds int64_t `value` repeated by `repeatCount`.
+  inline Error embedInt64(int64_t value, size_t repeatCount = 1) { return embedDataArray(TypeId::kInt64, &value, 1, repeatCount); }
+  //! Embeds uint64_t `value` repeated by `repeatCount`.
+  inline Error embedUInt64(uint64_t value, size_t repeatCount = 1) { return embedDataArray(TypeId::kUInt64, &value, 1, repeatCount); }
+  //! Embeds a floating point `value` repeated by `repeatCount`.
+  inline Error embedFloat(float value, size_t repeatCount = 1) { return embedDataArray(TypeId(TypeUtils::TypeIdOfT<float>::kTypeId), &value, 1, repeatCount); }
+  //! Embeds a floating point `value` repeated by `repeatCount`.
+  inline Error embedDouble(double value, size_t repeatCount = 1) { return embedDataArray(TypeId(TypeUtils::TypeIdOfT<double>::kTypeId), &value, 1, repeatCount); }
+
+  //! Embeds a constant pool at the current offset by performing the following:
+  //!   1. Aligns by using AlignMode::kData to the minimum `pool` alignment.
+  //!   2. Binds the ConstPool label so it's bound to an aligned location.
+  //!   3. Emits ConstPool content.
+  virtual Error embedConstPool(const Label& label, const ConstPool& pool) = 0;
+
+  //! Embeds an absolute `label` address as data.
+  //!
+  //! The `dataSize` is an optional argument that can be used to specify the size of the address data. If it's zero
+  //! (default) the address size is deduced from the target architecture (either 4 or 8 bytes).
+  virtual Error embedLabel(const Label& label, size_t dataSize = 0) = 0;
+
+  //! Embeds a delta (distance) between the `label` and `base` calculating it as `label - base`. This function was
+  //! designed to make it easier to embed lookup tables where each index is a relative distance of two labels.
+  virtual Error embedLabelDelta(const Label& label, const Label& base, size_t dataSize = 0) = 0;
+
+  //! \}
+
+  //! \name Comment
+  //! \{
+
+  //! Emits a comment stored in `data` with an optional `size` parameter.
+  virtual Error comment(const char* data, size_t size = SIZE_MAX) = 0;
+
+  //! Emits a formatted comment specified by `fmt` and variable number of arguments.
+  ASMJIT_API Error commentf(const char* fmt, ...);
+  //! Emits a formatted comment specified by `fmt` and `ap`.
+  ASMJIT_API Error commentv(const char* fmt, va_list ap);
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  //! Called after the emitter was attached to `CodeHolder`.
+  virtual Error onAttach(CodeHolder* ASMJIT_NONNULL(code)) noexcept = 0;
+  //! Called after the emitter was detached from `CodeHolder`.
+  virtual Error onDetach(CodeHolder* ASMJIT_NONNULL(code)) noexcept = 0;
+
+  //! Called when \ref CodeHolder has updated an important setting, which involves the following:
+  //!
+  //!   - \ref Logger has been changed (\ref CodeHolder::setLogger() has been called).
+  //!
+  //!   - \ref ErrorHandler has been changed (\ref CodeHolder::setErrorHandler() has been called).
+  //!
+  //! This function ensures that the settings are properly propagated from \ref CodeHolder to the emitter.
+  //!
+  //! \note This function is virtual and can be overridden, however, if you do so, always call \ref
+  //! BaseEmitter::onSettingsUpdated() within your own implementation to ensure that the emitter is
+  //! in a consistent state.
+  ASMJIT_API virtual void onSettingsUpdated() noexcept;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_EMITTER_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/emitterutils.cpp b/lib/lepton/asmjit/core/emitterutils.cpp
new file mode 100644
index 0000000000..f36a1b3774
--- /dev/null
+++ b/lib/lepton/asmjit/core/emitterutils.cpp
@@ -0,0 +1,129 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/assembler.h"
+#include "../core/emitterutils_p.h"
+#include "../core/formatter_p.h"
+#include "../core/logger.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+namespace EmitterUtils {
+
+#ifndef ASMJIT_NO_LOGGING
+
+Error finishFormattedLine(String& sb, const FormatOptions& formatOptions, const uint8_t* binData, size_t binSize, size_t offsetSize, size_t immSize, const char* comment) noexcept {
+  ASMJIT_ASSERT(binSize >= offsetSize);
+  const size_t kNoBinSize = SIZE_MAX;
+
+  size_t commentSize = comment ? Support::strLen(comment, Globals::kMaxCommentSize) : 0;
+
+  if ((binSize != 0 && binSize != kNoBinSize) || commentSize) {
+    char sep = ';';
+    size_t padding = Formatter::paddingFromOptions(formatOptions, FormatPaddingGroup::kRegularLine);
+
+    for (size_t i = (binSize == kNoBinSize); i < 2; i++) {
+      ASMJIT_PROPAGATE(sb.padEnd(padding));
+
+      if (sep) {
+        ASMJIT_PROPAGATE(sb.append(sep));
+        ASMJIT_PROPAGATE(sb.append(' '));
+      }
+
+      // Append binary data or comment.
+      if (i == 0) {
+        ASMJIT_PROPAGATE(sb.appendHex(binData, binSize - offsetSize - immSize));
+        ASMJIT_PROPAGATE(sb.appendChars('.', offsetSize * 2));
+        ASMJIT_PROPAGATE(sb.appendHex(binData + binSize - immSize, immSize));
+        if (commentSize == 0) break;
+      }
+      else {
+        ASMJIT_PROPAGATE(sb.append(comment, commentSize));
+      }
+
+      sep = '|';
+      padding += Formatter::paddingFromOptions(formatOptions, FormatPaddingGroup::kMachineCode);
+    }
+  }
+
+  return sb.append('\n');
+}
+
+void logLabelBound(BaseAssembler* self, const Label& label) noexcept {
+  Logger* logger = self->logger();
+
+  StringTmp<512> sb;
+  size_t binSize = logger->hasFlag(FormatFlags::kMachineCode) ? size_t(0) : SIZE_MAX;
+
+  sb.appendChars(' ', logger->indentation(FormatIndentationGroup::kLabel));
+  Formatter::formatLabel(sb, logger->flags(), self, label.id());
+  sb.append(':');
+  finishFormattedLine(sb, logger->options(), nullptr, binSize, 0, 0, self->_inlineComment);
+  logger->log(sb.data(), sb.size());
+}
+
+void logInstructionEmitted(
+  BaseAssembler* self,
+  InstId instId,
+  InstOptions options,
+  const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt,
+  uint32_t relSize, uint32_t immSize, uint8_t* afterCursor) {
+
+  Logger* logger = self->logger();
+  ASMJIT_ASSERT(logger != nullptr);
+
+  StringTmp<256> sb;
+  FormatFlags formatFlags = logger->flags();
+
+  uint8_t* beforeCursor = self->bufferPtr();
+  intptr_t emittedSize = (intptr_t)(afterCursor - beforeCursor);
+
+  Operand_ opArray[Globals::kMaxOpCount];
+  opArrayFromEmitArgs(opArray, o0, o1, o2, opExt);
+
+  sb.appendChars(' ', logger->indentation(FormatIndentationGroup::kCode));
+  self->_funcs.formatInstruction(sb, formatFlags, self, self->arch(), BaseInst(instId, options, self->extraReg()), opArray, Globals::kMaxOpCount);
+
+  if (Support::test(formatFlags, FormatFlags::kMachineCode))
+    finishFormattedLine(sb, logger->options(), self->bufferPtr(), size_t(emittedSize), relSize, immSize, self->inlineComment());
+  else
+    finishFormattedLine(sb, logger->options(), nullptr, SIZE_MAX, 0, 0, self->inlineComment());
+  logger->log(sb);
+}
+
+Error logInstructionFailed(
+  BaseAssembler* self,
+  Error err,
+  InstId instId,
+  InstOptions options,
+  const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) {
+
+  StringTmp<256> sb;
+  sb.append(DebugUtils::errorAsString(err));
+  sb.append(": ");
+
+  Operand_ opArray[Globals::kMaxOpCount];
+  opArrayFromEmitArgs(opArray, o0, o1, o2, opExt);
+
+  self->_funcs.formatInstruction(sb, FormatFlags::kNone, self, self->arch(), BaseInst(instId, options, self->extraReg()), opArray, Globals::kMaxOpCount);
+
+  if (self->inlineComment()) {
+    sb.append(" ; ");
+    sb.append(self->inlineComment());
+  }
+
+  self->resetInstOptions();
+  self->resetExtraReg();
+  self->resetInlineComment();
+  return self->reportError(err, sb.data());
+}
+
+#endif
+
+} // {EmitterUtils}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/emitterutils_p.h b/lib/lepton/asmjit/core/emitterutils_p.h
new file mode 100644
index 0000000000..b7610e7026
--- /dev/null
+++ b/lib/lepton/asmjit/core/emitterutils_p.h
@@ -0,0 +1,89 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_EMITTERUTILS_P_H_INCLUDED
+#define ASMJIT_CORE_EMITTERUTILS_P_H_INCLUDED
+
+#include "../core/emitter.h"
+#include "../core/operand.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+class BaseAssembler;
+class FormatOptions;
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_core
+//! \{
+
+//! Utilities used by various emitters, mostly Assembler implementations.
+namespace EmitterUtils {
+
+//! Default paddings used by Emitter utils and Formatter.
+
+static constexpr Operand noExt[3];
+
+enum kOpIndex : uint32_t {
+  kOp3 = 0,
+  kOp4 = 1,
+  kOp5 = 2
+};
+
+static ASMJIT_FORCE_INLINE uint32_t opCountFromEmitArgs(const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) noexcept {
+  uint32_t opCount = 0;
+
+  if (opExt[kOp3].isNone()) {
+    if (!o0.isNone()) opCount = 1;
+    if (!o1.isNone()) opCount = 2;
+    if (!o2.isNone()) opCount = 3;
+  }
+  else {
+    opCount = 4;
+    if (!opExt[kOp4].isNone()) {
+      opCount = 5 + uint32_t(!opExt[kOp5].isNone());
+    }
+  }
+
+  return opCount;
+}
+
+static ASMJIT_FORCE_INLINE void opArrayFromEmitArgs(Operand_ dst[Globals::kMaxOpCount], const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) noexcept {
+  dst[0].copyFrom(o0);
+  dst[1].copyFrom(o1);
+  dst[2].copyFrom(o2);
+  dst[3].copyFrom(opExt[kOp3]);
+  dst[4].copyFrom(opExt[kOp4]);
+  dst[5].copyFrom(opExt[kOp5]);
+}
+
+#ifndef ASMJIT_NO_LOGGING
+Error finishFormattedLine(String& sb, const FormatOptions& formatOptions, const uint8_t* binData, size_t binSize, size_t offsetSize, size_t immSize, const char* comment) noexcept;
+
+void logLabelBound(BaseAssembler* self, const Label& label) noexcept;
+
+void logInstructionEmitted(
+  BaseAssembler* self,
+  InstId instId,
+  InstOptions options,
+  const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt,
+  uint32_t relSize, uint32_t immSize, uint8_t* afterCursor);
+
+Error logInstructionFailed(
+  BaseAssembler* self,
+  Error err,
+  InstId instId,
+  InstOptions options,
+  const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt);
+#endif
+
+}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_EMITTERUTILS_P_H_INCLUDED
+
diff --git a/lib/lepton/asmjit/core/environment.cpp b/lib/lepton/asmjit/core/environment.cpp
new file mode 100644
index 0000000000..9a694af610
--- /dev/null
+++ b/lib/lepton/asmjit/core/environment.cpp
@@ -0,0 +1,46 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/environment.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// X86 Target
+// ----------
+//
+//   - 32-bit - Linux, OSX, BSD, and apparently also Haiku guarantee 16-byte
+//              stack alignment. Other operating systems are assumed to have
+//              4-byte alignment by default for safety reasons.
+//   - 64-bit - stack must be aligned to 16 bytes.
+//
+// ARM Target
+// ----------
+//
+//   - 32-bit - Stack must be aligned to 8 bytes.
+//   - 64-bit - Stack must be aligned to 16 bytes (hardware requirement).
+uint32_t Environment::stackAlignment() const noexcept {
+  if (is64Bit()) {
+    // Assume 16-byte alignment on any 64-bit target.
+    return 16;
+  }
+  else {
+    // The following platforms use 16-byte alignment in 32-bit mode.
+    if (isPlatformLinux() ||
+        isPlatformBSD() ||
+        isPlatformApple() ||
+        isPlatformHaiku()) {
+      return 16u;
+    }
+
+    if (isFamilyARM())
+      return 8;
+
+    // Bail to 4-byte alignment if we don't know.
+    return 4;
+  }
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/environment.h b/lib/lepton/asmjit/core/environment.h
new file mode 100644
index 0000000000..7e328a97c8
--- /dev/null
+++ b/lib/lepton/asmjit/core/environment.h
@@ -0,0 +1,508 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ENVIRONMENT_H_INCLUDED
+#define ASMJIT_CORE_ENVIRONMENT_H_INCLUDED
+
+#include "../core/archtraits.h"
+
+#if defined(__APPLE__)
+  #include <TargetConditionals.h>
+#endif
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_core
+//! \{
+
+//! Vendor.
+//!
+//! \note AsmJit doesn't use vendor information at the moment. It's provided for future use, if required.
+enum class Vendor : uint8_t {
+  //! Unknown or uninitialized platform vendor.
+  kUnknown = 0,
+
+  //! Maximum value of `PlatformVendor`.
+  kMaxValue = kUnknown,
+
+  //! Platform vendor detected at compile-time.
+  kHost =
+#if defined(_DOXYGEN)
+    DETECTED_AT_COMPILE_TIME
+#else
+    kUnknown
+#endif
+};
+
+//! Platform - runtime environment or operating system.
+enum class Platform : uint8_t {
+  //! Unknown or uninitialized platform.
+  kUnknown = 0,
+
+  //! Windows OS.
+  kWindows,
+
+  //! Other platform that is not Windows, most likely POSIX based.
+  kOther,
+
+  //! Linux OS.
+  kLinux,
+  //! GNU/Hurd OS.
+  kHurd,
+
+  //! FreeBSD OS.
+  kFreeBSD,
+  //! OpenBSD OS.
+  kOpenBSD,
+  //! NetBSD OS.
+  kNetBSD,
+  //! DragonFly BSD OS.
+  kDragonFlyBSD,
+
+  //! Haiku OS.
+  kHaiku,
+
+  //! Apple OSX.
+  kOSX,
+  //! Apple iOS.
+  kIOS,
+  //! Apple TVOS.
+  kTVOS,
+  //! Apple WatchOS.
+  kWatchOS,
+
+  //! Emscripten platform.
+  kEmscripten,
+
+  //! Maximum value of `Platform`.
+  kMaxValue = kEmscripten,
+
+  //! Platform detected at compile-time (platform of the host).
+  kHost =
+#if defined(_DOXYGEN)
+    DETECTED_AT_COMPILE_TIME
+#elif defined(__EMSCRIPTEN__)
+    kEmscripten
+#elif defined(_WIN32)
+    kWindows
+#elif defined(__linux__)
+    kLinux
+#elif defined(__gnu_hurd__)
+    kHurd
+#elif defined(__FreeBSD__)
+    kFreeBSD
+#elif defined(__OpenBSD__)
+    kOpenBSD
+#elif defined(__NetBSD__)
+    kNetBSD
+#elif defined(__DragonFly__)
+    kDragonFlyBSD
+#elif defined(__HAIKU__)
+    kHaiku
+#elif defined(__APPLE__) && TARGET_OS_OSX
+    kOSX
+#elif defined(__APPLE__) && TARGET_OS_TV
+    kTVOS
+#elif defined(__APPLE__) && TARGET_OS_WATCH
+    kWatchOS
+#elif defined(__APPLE__) && TARGET_OS_IPHONE
+    kIOS
+#else
+    kOther
+#endif
+};
+
+//! Platform ABI (application binary interface).
+enum class PlatformABI : uint8_t {
+  //! Unknown or uninitialied environment.
+  kUnknown = 0,
+  //! Microsoft ABI.
+  kMSVC,
+  //! GNU ABI.
+  kGNU,
+  //! Android Environment / ABI.
+  kAndroid,
+  //! Cygwin ABI.
+  kCygwin,
+
+  //! Maximum value of `PlatformABI`.
+  kMaxValue,
+
+  //! Host ABI detected at compile-time.
+  kHost =
+#if defined(_DOXYGEN)
+    DETECTED_AT_COMPILE_TIME
+#elif defined(_MSC_VER)
+    kMSVC
+#elif defined(__CYGWIN__)
+    kCygwin
+#elif defined(__MINGW32__) || defined(__GLIBC__)
+    kGNU
+#elif defined(__ANDROID__)
+    kAndroid
+#else
+    kUnknown
+#endif
+};
+
+//! Object format.
+//!
+//! \note AsmJit doesn't really use anything except \ref ObjectFormat::kUnknown and \ref ObjectFormat::kJIT at
+//! the moment. Object file formats are provided for future extensibility and a possibility to generate object
+//! files at some point.
+enum class ObjectFormat : uint8_t {
+  //! Unknown or uninitialized object format.
+  kUnknown = 0,
+
+  //! JIT code generation object, most likely \ref JitRuntime or a custom
+  //! \ref Target implementation.
+  kJIT,
+
+  //! Executable and linkable format (ELF).
+  kELF,
+  //! Common object file format.
+  kCOFF,
+  //! Extended COFF object format.
+  kXCOFF,
+  //! Mach object file format.
+  kMachO,
+
+  //! Maximum value of `ObjectFormat`.
+  kMaxValue
+};
+
+//! Represents an environment, which is usually related to a \ref Target.
+//!
+//! Environment has usually an 'arch-subarch-vendor-os-abi' format, which is sometimes called "Triple" (historically
+//! it used to be 3 only parts) or "Tuple", which is a convention used by Debian Linux.
+//!
+//! AsmJit doesn't support all possible combinations or architectures and ABIs, however, it models the environment
+//! similarly to other compilers for future extensibility.
+class Environment {
+public:
+  //! \name Members
+  //! \{
+
+  //! Architecture.
+  Arch _arch;
+  //! Sub-architecture type.
+  SubArch _subArch;
+  //! Vendor type.
+  Vendor _vendor;
+  //! Platform.
+  Platform _platform;
+  //! Platform ABI.
+  PlatformABI _platformABI;
+  //! Object format.
+  ObjectFormat _objectFormat;
+  //! Reserved for future use, must be zero.
+  uint8_t _reserved[2];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline Environment() noexcept :
+    _arch(Arch::kUnknown),
+    _subArch(SubArch::kUnknown),
+    _vendor(Vendor::kUnknown),
+    _platform(Platform::kUnknown),
+    _platformABI(PlatformABI::kUnknown),
+    _objectFormat(ObjectFormat::kUnknown),
+    _reserved { 0, 0 } {}
+
+  inline explicit Environment(
+    Arch arch,
+    SubArch subArch = SubArch::kUnknown,
+    Vendor vendor = Vendor::kUnknown,
+    Platform platform = Platform::kUnknown,
+    PlatformABI abi = PlatformABI::kUnknown,
+    ObjectFormat objectFormat = ObjectFormat::kUnknown) noexcept {
+
+    init(arch, subArch, vendor, platform, abi, objectFormat);
+  }
+
+  inline Environment(const Environment& other) noexcept = default;
+
+  //! Returns the host environment constructed from preprocessor macros defined by the compiler.
+  //!
+  //! The returned environment should precisely match the target host architecture, sub-architecture, platform,
+  //! and ABI.
+  static inline Environment host() noexcept {
+    return Environment(Arch::kHost, SubArch::kHost, Vendor::kHost, Platform::kHost, PlatformABI::kHost, ObjectFormat::kUnknown);
+  }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline Environment& operator=(const Environment& other) noexcept = default;
+
+  inline bool operator==(const Environment& other) const noexcept { return  equals(other); }
+  inline bool operator!=(const Environment& other) const noexcept { return !equals(other); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether the environment is not set up.
+  //!
+  //! Returns true if all members are zero, and thus unknown.
+  inline bool empty() const noexcept {
+    // Unfortunately compilers won't optimize fields are checked one by one...
+    return _packed() == 0;
+  }
+
+  //! Tests whether the environment is initialized, which means it must have
+  //! a valid architecture.
+  inline bool isInitialized() const noexcept {
+    return _arch != Arch::kUnknown;
+  }
+
+  inline uint64_t _packed() const noexcept {
+    uint64_t x;
+    memcpy(&x, this, 8);
+    return x;
+  }
+
+  //! Resets all members of the environment to zero / unknown.
+  inline void reset() noexcept {
+    _arch = Arch::kUnknown;
+    _subArch = SubArch::kUnknown;
+    _vendor = Vendor::kUnknown;
+    _platform = Platform::kUnknown;
+    _platformABI = PlatformABI::kUnknown;
+    _objectFormat = ObjectFormat::kUnknown;
+    _reserved[0] = 0;
+    _reserved[1] = 0;
+  }
+
+  inline bool equals(const Environment& other) const noexcept {
+    return _packed() == other._packed();
+  }
+
+  //! Returns the architecture.
+  inline Arch arch() const noexcept { return _arch; }
+  //! Returns the sub-architecture.
+  inline SubArch subArch() const noexcept { return _subArch; }
+  //! Returns vendor.
+  inline Vendor vendor() const noexcept { return _vendor; }
+  //! Returns target's platform or operating system.
+  inline Platform platform() const noexcept { return _platform; }
+  //! Returns target's ABI.
+  inline PlatformABI platformABI() const noexcept { return _platformABI; }
+  //! Returns target's object format.
+  inline ObjectFormat objectFormat() const noexcept { return _objectFormat; }
+
+  inline void init(
+    Arch arch,
+    SubArch subArch = SubArch::kUnknown,
+    Vendor vendor = Vendor::kUnknown,
+    Platform platform = Platform::kUnknown,
+    PlatformABI platformABI = PlatformABI::kUnknown,
+    ObjectFormat objectFormat = ObjectFormat::kUnknown) noexcept {
+
+    _arch = arch;
+    _subArch = subArch;
+    _vendor = vendor;
+    _platform = platform;
+    _platformABI = platformABI;
+    _objectFormat = objectFormat;
+    _reserved[0] = 0;
+    _reserved[1] = 0;
+  }
+
+  inline bool isArchX86() const noexcept { return _arch == Arch::kX86; }
+  inline bool isArchX64() const noexcept { return _arch == Arch::kX64; }
+  inline bool isArchARM() const noexcept { return isArchARM(_arch); }
+  inline bool isArchThumb() const noexcept { return isArchThumb(_arch); }
+  inline bool isArchAArch64() const noexcept { return isArchAArch64(_arch); }
+  inline bool isArchMIPS32() const noexcept { return isArchMIPS32(_arch); }
+  inline bool isArchMIPS64() const noexcept { return isArchMIPS64(_arch); }
+  inline bool isArchRISCV32() const noexcept { return _arch == Arch::kRISCV32; }
+  inline bool isArchRISCV64() const noexcept { return _arch == Arch::kRISCV64; }
+
+  //! Tests whether the architecture is 32-bit.
+  inline bool is32Bit() const noexcept { return is32Bit(_arch); }
+  //! Tests whether the architecture is 64-bit.
+  inline bool is64Bit() const noexcept { return is64Bit(_arch); }
+
+  //! Tests whether the architecture is little endian.
+  inline bool isLittleEndian() const noexcept { return isLittleEndian(_arch); }
+  //! Tests whether the architecture is big endian.
+  inline bool isBigEndian() const noexcept { return isBigEndian(_arch); }
+
+  //! Tests whether this architecture is of X86 family.
+  inline bool isFamilyX86() const noexcept { return isFamilyX86(_arch); }
+  //! Tests whether this architecture family is ARM, THUMB, or AArch64.
+  inline bool isFamilyARM() const noexcept { return isFamilyARM(_arch); }
+  //! Tests whether this architecture family is AArch32 (ARM or THUMB).
+  inline bool isFamilyAArch32() const noexcept { return isFamilyAArch32(_arch); }
+  //! Tests whether this architecture family is AArch64.
+  inline bool isFamilyAArch64() const noexcept { return isFamilyAArch64(_arch); }
+  //! Tests whether this architecture family is MISP or MIPS64.
+  inline bool isFamilyMIPS() const noexcept { return isFamilyMIPS(_arch); }
+  //! Tests whether this architecture family is RISC-V (both 32-bit and 64-bit).
+  inline bool isFamilyRISCV() const noexcept { return isFamilyRISCV(_arch); }
+
+  //! Tests whether the environment platform is Windows.
+  inline bool isPlatformWindows() const noexcept { return _platform == Platform::kWindows; }
+
+  //! Tests whether the environment platform is Linux.
+  inline bool isPlatformLinux() const noexcept { return _platform == Platform::kLinux; }
+
+  //! Tests whether the environment platform is Hurd.
+  inline bool isPlatformHurd() const noexcept { return _platform == Platform::kHurd; }
+
+  //! Tests whether the environment platform is Haiku.
+  inline bool isPlatformHaiku() const noexcept { return _platform == Platform::kHaiku; }
+
+  //! Tests whether the environment platform is any BSD.
+  inline bool isPlatformBSD() const noexcept {
+    return _platform == Platform::kFreeBSD ||
+           _platform == Platform::kOpenBSD ||
+           _platform == Platform::kNetBSD ||
+           _platform == Platform::kDragonFlyBSD;
+  }
+
+  //! Tests whether the environment platform is any Apple platform (OSX, iOS, TVOS, WatchOS).
+  inline bool isPlatformApple() const noexcept {
+    return _platform == Platform::kOSX ||
+           _platform == Platform::kIOS ||
+           _platform == Platform::kTVOS ||
+           _platform == Platform::kWatchOS;
+  }
+
+  //! Tests whether the ABI is MSVC.
+  inline bool isMSVC() const noexcept { return _platformABI == PlatformABI::kMSVC; }
+  //! Tests whether the ABI is GNU.
+  inline bool isGNU() const noexcept { return _platformABI == PlatformABI::kGNU; }
+
+  //! Returns a calculated stack alignment for this environment.
+  ASMJIT_API uint32_t stackAlignment() const noexcept;
+
+  //! Returns a native register size of this architecture.
+  uint32_t registerSize() const noexcept { return registerSizeFromArch(_arch); }
+
+  //! Sets the architecture to `arch`.
+  inline void setArch(Arch arch) noexcept { _arch = arch; }
+  //! Sets the sub-architecture to `subArch`.
+  inline void setSubArch(SubArch subArch) noexcept { _subArch = subArch; }
+  //! Sets the vendor to `vendor`.
+  inline void setVendor(Vendor vendor) noexcept { _vendor = vendor; }
+  //! Sets the platform to `platform`.
+  inline void setPlatform(Platform platform) noexcept { _platform = platform; }
+  //! Sets the ABI to `platformABI`.
+  inline void setPlatformABI(PlatformABI platformABI) noexcept { _platformABI = platformABI; }
+  //! Sets the object format to `objectFormat`.
+  inline void setObjectFormat(ObjectFormat objectFormat) noexcept { _objectFormat = objectFormat; }
+
+  //! \}
+
+  //! \name Static Utilities
+  //! \{
+
+  static inline bool isDefinedArch(Arch arch) noexcept {
+    return uint32_t(arch) <= uint32_t(Arch::kMaxValue);
+  }
+
+  static inline bool isValidArch(Arch arch) noexcept {
+    return arch != Arch::kUnknown && uint32_t(arch) <= uint32_t(Arch::kMaxValue);
+  }
+
+  //! Tests whether the given architecture `arch` is 32-bit.
+  static inline bool is32Bit(Arch arch) noexcept {
+    return (uint32_t(arch) & uint32_t(Arch::k32BitMask)) == uint32_t(Arch::k32BitMask);
+  }
+
+  //! Tests whether the given architecture `arch` is 64-bit.
+  static inline bool is64Bit(Arch arch) noexcept {
+    return (uint32_t(arch) & uint32_t(Arch::k32BitMask)) == 0;
+  }
+
+  //! Tests whether the given architecture `arch` is little endian.
+  static inline bool isLittleEndian(Arch arch) noexcept {
+    return uint32_t(arch) < uint32_t(Arch::kBigEndian);
+  }
+
+  //! Tests whether the given architecture `arch` is big endian.
+  static inline bool isBigEndian(Arch arch) noexcept {
+    return uint32_t(arch) >= uint32_t(Arch::kBigEndian);
+  }
+
+  //! Tests whether the given architecture is Thumb or Thumb_BE.
+  static inline bool isArchThumb(Arch arch) noexcept {
+    return arch == Arch::kThumb || arch == Arch::kThumb_BE;
+  }
+
+  //! Tests whether the given architecture is ARM or ARM_BE.
+  static inline bool isArchARM(Arch arch) noexcept {
+    return arch == Arch::kARM || arch == Arch::kARM_BE;
+  }
+
+  //! Tests whether the given architecture is AArch64 or AArch64_BE.
+  static inline bool isArchAArch64(Arch arch) noexcept {
+    return arch == Arch::kAArch64 || arch == Arch::kAArch64_BE;
+  }
+
+  //! Tests whether the given architecture is MIPS32_LE or MIPS32_BE.
+  static inline bool isArchMIPS32(Arch arch) noexcept {
+    return arch == Arch::kMIPS32_LE || arch == Arch::kMIPS32_BE;
+  }
+
+  //! Tests whether the given architecture is MIPS64_LE or MIPS64_BE.
+  static inline bool isArchMIPS64(Arch arch) noexcept {
+    return arch == Arch::kMIPS64_LE || arch == Arch::kMIPS64_BE;
+  }
+
+  //! Tests whether the given architecture family is X86 or X64.
+  static inline bool isFamilyX86(Arch arch) noexcept {
+    return arch == Arch::kX86 || arch == Arch::kX64;
+  }
+
+  //! Tests whether the given architecture family is ARM, THUMB, or AArch64.
+  static inline bool isFamilyARM(Arch arch) noexcept {
+    return isArchARM(arch) || isArchAArch64(arch) || isArchThumb(arch);
+  }
+
+  //! Tests whether the given architecture family is AArch32 (ARM or THUMB).
+  static inline bool isFamilyAArch32(Arch arch) noexcept {
+    return isArchARM(arch) || isArchThumb(arch);
+  }
+
+  //! Tests whether the given architecture family is AArch64.
+  static inline bool isFamilyAArch64(Arch arch) noexcept {
+    return isArchAArch64(arch);
+  }
+
+  //! Tests whether the given architecture family is MISP or MIPS64.
+  static inline bool isFamilyMIPS(Arch arch) noexcept {
+    return isArchMIPS32(arch) || isArchMIPS64(arch);
+  }
+
+  //! Tests whether the given architecture family is RISC-V (both 32-bit and 64-bit).
+  static inline bool isFamilyRISCV(Arch arch) noexcept {
+    return arch == Arch::kRISCV32 || arch == Arch::kRISCV64;
+  }
+
+  //! Returns a native general purpose register size from the given architecture.
+  static inline uint32_t registerSizeFromArch(Arch arch) noexcept {
+    return is32Bit(arch) ? 4u : 8u;
+  }
+
+  //! \}
+};
+
+static_assert(sizeof(Environment) == 8,
+              "Environment must occupy exactly 8 bytes.");
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ENVIRONMENT_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/errorhandler.cpp b/lib/lepton/asmjit/core/errorhandler.cpp
new file mode 100644
index 0000000000..5a7dac5235
--- /dev/null
+++ b/lib/lepton/asmjit/core/errorhandler.cpp
@@ -0,0 +1,14 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/errorhandler.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+ErrorHandler::ErrorHandler() noexcept {}
+ErrorHandler::~ErrorHandler() noexcept {}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/errorhandler.h b/lib/lepton/asmjit/core/errorhandler.h
new file mode 100644
index 0000000000..5151d43304
--- /dev/null
+++ b/lib/lepton/asmjit/core/errorhandler.h
@@ -0,0 +1,228 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ERRORHANDLER_H_INCLUDED
+#define ASMJIT_CORE_ERRORHANDLER_H_INCLUDED
+
+#include "../core/globals.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_error_handling
+//! \{
+
+class BaseEmitter;
+
+//! Error handler can be used to override the default behavior of error handling.
+//!
+//! It's available to all classes that inherit `BaseEmitter`. Override \ref ErrorHandler::handleError() to implement
+//! your own error handler.
+//!
+//! The following use-cases are supported:
+//!
+//!   - Record the error and continue code generation. This is the simplest approach that can be used to at least log
+//!     possible errors.
+//!   - Throw an exception. AsmJit doesn't use exceptions and is completely exception-safe, but it's perfectly legal
+//!     to throw an exception from the error handler.
+//!   - Use plain old C's `setjmp()` and `longjmp()`. Asmjit always puts Assembler, Builder and Compiler to
+//!     a consistent state before calling \ref handleError(), so `longjmp()` can be used without issues to cancel the
+//!     code generation if an error occurred. This method can be used if exception handling in your project is turned
+//!     off and you still want some comfort. In most cases it should be safe as AsmJit uses \ref Zone memory and the
+//!     ownership of memory it allocates always ends with the instance that allocated it. If using this approach please
+//!     never jump outside the life-time of \ref CodeHolder and \ref BaseEmitter.
+//!
+//! \ref ErrorHandler can be attached to \ref CodeHolder or \ref BaseEmitter, which has a priority. The example below
+//! uses error handler that just prints the error, but lets AsmJit continue:
+//!
+//! ```
+//! // Error Handling #1 - Logging and returning Error.
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Error handler that just prints the error and lets AsmJit ignore it.
+//! class SimpleErrorHandler : public ErrorHandler {
+//! public:
+//!   Error err;
+//!
+//!   inline SimpleErrorHandler() : err(kErrorOk) {}
+//!
+//!   void handleError(Error err, const char* message, BaseEmitter* origin) override {
+//!     this->err = err;
+//!     fprintf(stderr, "ERROR: %s\n", message);
+//!   }
+//! };
+//!
+//! int main() {
+//!   JitRuntime rt;
+//!   SimpleErrorHandler eh;
+//!
+//!   CodeHolder code;
+//!   code.init(rt.environment());
+//!   code.setErrorHandler(&eh);
+//!
+//!   // Try to emit instruction that doesn't exist.
+//!   x86::Assembler a(&code);
+//!   a.emit(x86::Inst::kIdMov, x86::xmm0, x86::xmm1);
+//!
+//!   if (eh.err) {
+//!     // Assembler failed!
+//!     return 1;
+//!   }
+//!
+//!   return 0;
+//! }
+//! ```
+//!
+//! If error happens during instruction emitting / encoding the assembler behaves transactionally - the output buffer
+//! won't advance if encoding failed, thus either a fully encoded instruction or nothing is emitted. The error handling
+//! shown above is useful, but it's still not the best way of dealing with errors in AsmJit. The following example
+//! shows how to use exception handling to handle errors in a more C++ way:
+//!
+//! ```
+//! // Error Handling #2 - Throwing an exception.
+//! #include <asmjit/x86.h>
+//! #include <exception>
+//! #include <string>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Error handler that throws a user-defined `AsmJitException`.
+//! class AsmJitException : public std::exception {
+//! public:
+//!   Error err;
+//!   std::string message;
+//!
+//!   AsmJitException(Error err, const char* message) noexcept
+//!     : err(err),
+//!       message(message) {}
+//!
+//!   const char* what() const noexcept override { return message.c_str(); }
+//! };
+//!
+//! class ThrowableErrorHandler : public ErrorHandler {
+//! public:
+//!   // Throw is possible, functions that use ErrorHandler are never 'noexcept'.
+//!   void handleError(Error err, const char* message, BaseEmitter* origin) override {
+//!     throw AsmJitException(err, message);
+//!   }
+//! };
+//!
+//! int main() {
+//!   JitRuntime rt;
+//!   ThrowableErrorHandler eh;
+//!
+//!   CodeHolder code;
+//!   code.init(rt.environment());
+//!   code.setErrorHandler(&eh);
+//!
+//!   x86::Assembler a(&code);
+//!
+//!   // Try to emit instruction that doesn't exist.
+//!   try {
+//!     a.emit(x86::Inst::kIdMov, x86::xmm0, x86::xmm1);
+//!   }
+//!   catch (const AsmJitException& ex) {
+//!     printf("EXCEPTION THROWN: %s\n", ex.what());
+//!     return 1;
+//!   }
+//!
+//!   return 0;
+//! }
+//! ```
+//!
+//! If C++ exceptions are not what you like or your project turns off them completely there is still a way of reducing
+//! the error handling to a minimum by using a standard setjmp/longjmp approach. AsmJit is exception-safe and cleans
+//! up everything before calling the ErrorHandler, so any approach is safe. You can simply jump from the error handler
+//! without causing any side-effects or memory leaks. The following example demonstrates how it could be done:
+//!
+//! ```
+//! // Error Handling #3 - Using setjmp/longjmp if exceptions are not allowed.
+//! #include <asmjit/x86.h>
+//! #include <setjmp.h>
+//! #include <stdio.h>
+//!
+//! class LongJmpErrorHandler : public asmjit::ErrorHandler {
+//! public:
+//!   inline LongJmpErrorHandler() : err(asmjit::kErrorOk) {}
+//!
+//!   void handleError(asmjit::Error err, const char* message, asmjit::BaseEmitter* origin) override {
+//!     this->err = err;
+//!     longjmp(state, 1);
+//!   }
+//!
+//!   jmp_buf state;
+//!   asmjit::Error err;
+//! };
+//!
+//! int main(int argc, char* argv[]) {
+//!   using namespace asmjit;
+//!
+//!   JitRuntime rt;
+//!   LongJmpErrorHandler eh;
+//!
+//!   CodeHolder code;
+//!   code.init(rt.rt.environment());
+//!   code.setErrorHandler(&eh);
+//!
+//!   x86::Assembler a(&code);
+//!
+//!   if (!setjmp(eh.state)) {
+//!     // Try to emit instruction that doesn't exist.
+//!     a.emit(x86::Inst::kIdMov, x86::xmm0, x86::xmm1);
+//!   }
+//!   else {
+//!     Error err = eh.err;
+//!     printf("ASMJIT ERROR: 0x%08X [%s]\n", err, DebugUtils::errorAsString(err));
+//!   }
+//!
+//!   return 0;
+//! }
+//! ```
+class ASMJIT_VIRTAPI ErrorHandler {
+public:
+  ASMJIT_BASE_CLASS(ErrorHandler)
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `ErrorHandler` instance.
+  ASMJIT_API ErrorHandler() noexcept;
+  //! Destroys the `ErrorHandler` instance.
+  ASMJIT_API virtual ~ErrorHandler() noexcept;
+
+  //! \}
+
+  //! \name Interface
+  //! \{
+
+  //! Error handler (must be reimplemented).
+  //!
+  //! Error handler is called after an error happened and before it's propagated to the caller. There are multiple
+  //! ways how the error handler can be used:
+  //!
+  //! 1. User-based error handling without throwing exception or using C's`longjmp()`. This is for users that don't
+  //!     use exceptions and want customized error handling.
+  //!
+  //! 2. Throwing an exception. AsmJit doesn't use exceptions and is completely exception-safe, but you can throw
+  //!     exception from your error handler if this way is the preferred way of handling errors in your project.
+  //!
+  //! 3. Using plain old C's `setjmp()` and `longjmp()`. Asmjit always puts `BaseEmitter` to a consistent state before
+  //!    calling `handleError()`  so `longjmp()` can be used without any issues to cancel the code generation if an
+  //!    error occurred. There is no difference between exceptions and `longjmp()` from AsmJit's perspective, however,
+  //!    never jump outside of `CodeHolder` and `BaseEmitter` scope as you would leak memory.
+  virtual void handleError(Error err, const char* message, BaseEmitter* origin) = 0;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ERRORHANDLER_H_INCLUDED
+
diff --git a/lib/lepton/asmjit/core/formatter.cpp b/lib/lepton/asmjit/core/formatter.cpp
new file mode 100644
index 0000000000..1a9a386e3f
--- /dev/null
+++ b/lib/lepton/asmjit/core/formatter.cpp
@@ -0,0 +1,584 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/archtraits.h"
+#include "../core/builder.h"
+#include "../core/codeholder.h"
+#include "../core/compiler.h"
+#include "../core/emitter.h"
+#include "../core/formatter_p.h"
+#include "../core/string.h"
+#include "../core/support.h"
+#include "../core/type.h"
+
+#if !defined(ASMJIT_NO_X86)
+  #include "../x86/x86formatter_p.h"
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  #include "../arm/a64formatter_p.h"
+#endif
+
+ASMJIT_BEGIN_NAMESPACE
+
+#if defined(ASMJIT_NO_COMPILER)
+class VirtReg;
+#endif
+
+namespace Formatter {
+
+static const char wordNameTable[][8] = {
+  "db",
+  "dw",
+  "dd",
+  "dq",
+  "byte",
+  "half",
+  "word",
+  "hword",
+  "dword",
+  "qword",
+  "xword",
+  "short",
+  "long",
+  "quad"
+};
+
+
+Error formatTypeId(String& sb, TypeId typeId) noexcept {
+  if (typeId == TypeId::kVoid)
+    return sb.append("void");
+
+  if (!TypeUtils::isValid(typeId))
+    return sb.append("unknown");
+
+  const char* typeName = "unknown";
+  uint32_t typeSize = TypeUtils::sizeOf(typeId);
+  TypeId scalarType = TypeUtils::scalarOf(typeId);
+
+  switch (scalarType) {
+    case TypeId::kIntPtr : typeName = "intptr" ; break;
+    case TypeId::kUIntPtr: typeName = "uintptr"; break;
+    case TypeId::kInt8   : typeName = "int8"   ; break;
+    case TypeId::kUInt8  : typeName = "uint8"  ; break;
+    case TypeId::kInt16  : typeName = "int16"  ; break;
+    case TypeId::kUInt16 : typeName = "uint16" ; break;
+    case TypeId::kInt32  : typeName = "int32"  ; break;
+    case TypeId::kUInt32 : typeName = "uint32" ; break;
+    case TypeId::kInt64  : typeName = "int64"  ; break;
+    case TypeId::kUInt64 : typeName = "uint64" ; break;
+    case TypeId::kFloat32: typeName = "float32"; break;
+    case TypeId::kFloat64: typeName = "float64"; break;
+    case TypeId::kFloat80: typeName = "float80"; break;
+    case TypeId::kMask8  : typeName = "mask8"  ; break;
+    case TypeId::kMask16 : typeName = "mask16" ; break;
+    case TypeId::kMask32 : typeName = "mask32" ; break;
+    case TypeId::kMask64 : typeName = "mask64" ; break;
+    case TypeId::kMmx32  : typeName = "mmx32"  ; break;
+    case TypeId::kMmx64  : typeName = "mmx64"  ; break;
+
+    default:
+      typeName = "unknown";
+      break;
+  }
+
+  uint32_t baseSize = TypeUtils::sizeOf(scalarType);
+  if (typeSize > baseSize) {
+    uint32_t count = typeSize / baseSize;
+    return sb.appendFormat("%sx%u", typeName, unsigned(count));
+  }
+  else {
+    return sb.append(typeName);
+  }
+}
+
+Error formatFeature(
+  String& sb,
+  Arch arch,
+  uint32_t featureId) noexcept {
+
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::FormatterInternal::formatFeature(sb, featureId);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH32) && !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyARM(arch))
+    return arm::FormatterInternal::formatFeature(sb, featureId);
+#endif
+
+  return kErrorInvalidArch;
+}
+
+Error formatLabel(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  uint32_t labelId) noexcept {
+
+  DebugUtils::unused(formatFlags);
+
+  const LabelEntry* le = emitter->code()->labelEntry(labelId);
+  if (ASMJIT_UNLIKELY(!le))
+    return sb.appendFormat("<InvalidLabel:%u>", labelId);
+
+  if (le->hasName()) {
+    if (le->hasParent()) {
+      uint32_t parentId = le->parentId();
+      const LabelEntry* pe = emitter->code()->labelEntry(parentId);
+
+      if (ASMJIT_UNLIKELY(!pe))
+        ASMJIT_PROPAGATE(sb.appendFormat("<InvalidLabel:%u>", labelId));
+      else if (ASMJIT_UNLIKELY(!pe->hasName()))
+        ASMJIT_PROPAGATE(sb.appendFormat("L%u", parentId));
+      else
+        ASMJIT_PROPAGATE(sb.append(pe->name()));
+
+      ASMJIT_PROPAGATE(sb.append('.'));
+    }
+
+    if (le->type() == LabelType::kAnonymous)
+      ASMJIT_PROPAGATE(sb.appendFormat("L%u@", labelId));
+    return sb.append(le->name());
+  }
+  else {
+    return sb.appendFormat("L%u", labelId);
+  }
+}
+
+Error formatRegister(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  RegType regType,
+  uint32_t regId) noexcept {
+
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::FormatterInternal::formatRegister(sb, formatFlags, emitter, arch, regType, regId);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyAArch64(arch))
+    return a64::FormatterInternal::formatRegister(sb, formatFlags, emitter, arch, regType, regId);
+#endif
+
+  return kErrorInvalidArch;
+}
+
+Error formatOperand(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const Operand_& op) noexcept {
+
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::FormatterInternal::formatOperand(sb, formatFlags, emitter, arch, op);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyAArch64(arch))
+    return a64::FormatterInternal::formatOperand(sb, formatFlags, emitter, arch, op);
+#endif
+
+  return kErrorInvalidArch;
+}
+
+ASMJIT_API Error formatDataType(
+  String& sb,
+  FormatFlags formatFlags,
+  Arch arch,
+  TypeId typeId) noexcept
+{
+  DebugUtils::unused(formatFlags);
+
+  if (ASMJIT_UNLIKELY(uint32_t(arch) > uint32_t(Arch::kMaxValue)))
+    return DebugUtils::errored(kErrorInvalidArch);
+
+  uint32_t typeSize = TypeUtils::sizeOf(typeId);
+  if (typeSize == 0 || typeSize > 8)
+    return DebugUtils::errored(kErrorInvalidState);
+
+  uint32_t typeSizeLog2 = Support::ctz(typeSize);
+  return sb.append(wordNameTable[size_t(ArchTraits::byArch(arch).typeNameIdByIndex(typeSizeLog2))]);
+}
+
+static Error formatDataHelper(String& sb, const char* typeName, uint32_t typeSize, const uint8_t* data, size_t itemCount) noexcept {
+  sb.append('.');
+  sb.append(typeName);
+  sb.append(' ');
+
+  for (size_t i = 0; i < itemCount; i++) {
+    uint64_t v = 0;
+
+    if (i != 0)
+      ASMJIT_PROPAGATE(sb.append(", ", 2));
+
+    switch (typeSize) {
+      case 1: v = data[0]; break;
+      case 2: v = Support::readU16u(data); break;
+      case 4: v = Support::readU32u(data); break;
+      case 8: v = Support::readU64u(data); break;
+    }
+
+    ASMJIT_PROPAGATE(sb.appendUInt(v, 16, typeSize * 2, StringFormatFlags::kAlternate));
+    data += typeSize;
+  }
+
+  return kErrorOk;
+}
+
+Error formatData(
+  String& sb,
+  FormatFlags formatFlags,
+  Arch arch,
+  TypeId typeId, const void* data, size_t itemCount, size_t repeatCount) noexcept
+{
+  DebugUtils::unused(formatFlags);
+
+  if (ASMJIT_UNLIKELY(!Environment::isDefinedArch(arch)))
+    return DebugUtils::errored(kErrorInvalidArch);
+
+  uint32_t typeSize = TypeUtils::sizeOf(typeId);
+  if (typeSize == 0)
+    return DebugUtils::errored(kErrorInvalidState);
+
+  if (!Support::isPowerOf2(typeSize)) {
+    itemCount *= typeSize;
+    typeSize = 1;
+  }
+
+  while (typeSize > 8u) {
+    typeSize >>= 1;
+    itemCount <<= 1;
+  }
+
+  uint32_t typeSizeLog2 = Support::ctz(typeSize);
+  const char* wordName = wordNameTable[size_t(ArchTraits::byArch(arch).typeNameIdByIndex(typeSizeLog2))];
+
+  if (repeatCount > 1)
+    ASMJIT_PROPAGATE(sb.appendFormat(".repeat %zu ", repeatCount));
+
+  return formatDataHelper(sb, wordName, typeSize, static_cast<const uint8_t*>(data), itemCount);
+}
+
+Error formatInstruction(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept {
+
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::FormatterInternal::formatInstruction(sb, formatFlags, emitter, arch, inst, operands, opCount);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyARM(arch))
+    return a64::FormatterInternal::formatInstruction(sb, formatFlags, emitter, arch, inst, operands, opCount);
+#endif
+
+  return kErrorInvalidArch;
+}
+
+#ifndef ASMJIT_NO_BUILDER
+
+#ifndef ASMJIT_NO_COMPILER
+static Error formatFuncValue(String& sb, FormatFlags formatFlags, const BaseEmitter* emitter, FuncValue value) noexcept {
+  TypeId typeId = value.typeId();
+  ASMJIT_PROPAGATE(formatTypeId(sb, typeId));
+
+  if (value.isAssigned()) {
+    ASMJIT_PROPAGATE(sb.append('@'));
+
+    if (value.isIndirect())
+      ASMJIT_PROPAGATE(sb.append('['));
+
+    // NOTE: It should be either reg or stack, but never both. We
+    // use two IFs on purpose so if the FuncValue is both it would
+    // show in logs.
+    if (value.isReg()) {
+      ASMJIT_PROPAGATE(formatRegister(sb, formatFlags, emitter, emitter->arch(), value.regType(), value.regId()));
+    }
+
+    if (value.isStack()) {
+      ASMJIT_PROPAGATE(sb.appendFormat("[%d]", int(value.stackOffset())));
+    }
+
+    if (value.isIndirect())
+      ASMJIT_PROPAGATE(sb.append(']'));
+  }
+
+  return kErrorOk;
+}
+
+static Error formatFuncValuePack(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseCompiler* cc,
+  const FuncValuePack& pack,
+  const RegOnly* vRegs) noexcept {
+
+  size_t count = pack.count();
+  if (!count)
+    return sb.append("void");
+
+  if (count > 1)
+    sb.append('[');
+
+  for (uint32_t valueIndex = 0; valueIndex < count; valueIndex++) {
+    const FuncValue& value = pack[valueIndex];
+    if (!value)
+      break;
+
+    if (valueIndex)
+      ASMJIT_PROPAGATE(sb.append(", "));
+
+    ASMJIT_PROPAGATE(formatFuncValue(sb, formatFlags, cc, value));
+
+    if (vRegs) {
+      const VirtReg* virtReg = nullptr;
+      static const char nullReg[] = "<none>";
+
+      if (vRegs[valueIndex].isReg() && cc->isVirtIdValid(vRegs[valueIndex].id()))
+        virtReg = cc->virtRegById(vRegs[valueIndex].id());
+
+      ASMJIT_PROPAGATE(sb.appendFormat(" %s", virtReg ? virtReg->name() : nullReg));
+    }
+  }
+
+  if (count > 1)
+    sb.append(']');
+
+  return kErrorOk;
+}
+
+static Error formatFuncRets(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseCompiler* cc,
+  const FuncDetail& fd) noexcept {
+
+  return formatFuncValuePack(sb, formatFlags, cc, fd.retPack(), nullptr);
+}
+
+static Error formatFuncArgs(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseCompiler* cc,
+  const FuncDetail& fd,
+  const FuncNode::ArgPack* argPacks) noexcept {
+
+  uint32_t argCount = fd.argCount();
+  if (!argCount)
+    return sb.append("void");
+
+  for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+    if (argIndex)
+      ASMJIT_PROPAGATE(sb.append(", "));
+
+    ASMJIT_PROPAGATE(formatFuncValuePack(sb, formatFlags, cc, fd.argPack(argIndex), argPacks[argIndex]._data));
+  }
+
+  return kErrorOk;
+}
+#endif
+
+Error formatNode(
+  String& sb,
+  const FormatOptions& formatOptions,
+  const BaseBuilder* builder,
+  const BaseNode* node) noexcept {
+
+  if (node->hasPosition() && formatOptions.hasFlag(FormatFlags::kPositions))
+    ASMJIT_PROPAGATE(sb.appendFormat("<%05u> ", node->position()));
+
+  size_t startLineIndex = sb.size();
+
+  switch (node->type()) {
+    case NodeType::kInst:
+    case NodeType::kJump: {
+      const InstNode* instNode = node->as<InstNode>();
+      ASMJIT_PROPAGATE(builder->_funcs.formatInstruction(sb, formatOptions.flags(), builder,
+        builder->arch(),
+        instNode->baseInst(), instNode->operands(), instNode->opCount()));
+      break;
+    }
+
+    case NodeType::kSection: {
+      const SectionNode* sectionNode = node->as<SectionNode>();
+      if (builder->_code->isSectionValid(sectionNode->id())) {
+        const Section* section = builder->_code->sectionById(sectionNode->id());
+        ASMJIT_PROPAGATE(sb.appendFormat(".section %s", section->name()));
+      }
+      break;
+    }
+
+    case NodeType::kLabel: {
+      const LabelNode* labelNode = node->as<LabelNode>();
+      ASMJIT_PROPAGATE(formatLabel(sb, formatOptions.flags(), builder, labelNode->labelId()));
+      ASMJIT_PROPAGATE(sb.append(":"));
+      break;
+    }
+
+    case NodeType::kAlign: {
+      const AlignNode* alignNode = node->as<AlignNode>();
+      ASMJIT_PROPAGATE(sb.appendFormat(".align %u (%s)",
+        alignNode->alignment(),
+        alignNode->alignMode() == AlignMode::kCode ? "code" : "data"));
+      break;
+    }
+
+    case NodeType::kEmbedData: {
+      const EmbedDataNode* embedNode = node->as<EmbedDataNode>();
+      ASMJIT_PROPAGATE(sb.append('.'));
+      ASMJIT_PROPAGATE(formatDataType(sb, formatOptions.flags(), builder->arch(), embedNode->typeId()));
+      ASMJIT_PROPAGATE(sb.appendFormat(" {Count=%zu Repeat=%zu TotalSize=%zu}", embedNode->itemCount(), embedNode->repeatCount(), embedNode->dataSize()));
+      break;
+    }
+
+    case NodeType::kEmbedLabel: {
+      const EmbedLabelNode* embedNode = node->as<EmbedLabelNode>();
+      ASMJIT_PROPAGATE(sb.append(".label "));
+      ASMJIT_PROPAGATE(formatLabel(sb, formatOptions.flags(), builder, embedNode->labelId()));
+      break;
+    }
+
+    case NodeType::kEmbedLabelDelta: {
+      const EmbedLabelDeltaNode* embedNode = node->as<EmbedLabelDeltaNode>();
+      ASMJIT_PROPAGATE(sb.append(".label ("));
+      ASMJIT_PROPAGATE(formatLabel(sb, formatOptions.flags(), builder, embedNode->labelId()));
+      ASMJIT_PROPAGATE(sb.append(" - "));
+      ASMJIT_PROPAGATE(formatLabel(sb, formatOptions.flags(), builder, embedNode->baseLabelId()));
+      ASMJIT_PROPAGATE(sb.append(")"));
+      break;
+    }
+
+    case NodeType::kConstPool: {
+      const ConstPoolNode* constPoolNode = node->as<ConstPoolNode>();
+      ASMJIT_PROPAGATE(sb.appendFormat("[ConstPool Size=%zu Alignment=%zu]", constPoolNode->size(), constPoolNode->alignment()));
+      break;
+    };
+
+    case NodeType::kComment: {
+      const CommentNode* commentNode = node->as<CommentNode>();
+      ASMJIT_PROPAGATE(sb.appendFormat("; %s", commentNode->inlineComment()));
+      break;
+    }
+
+    case NodeType::kSentinel: {
+      const SentinelNode* sentinelNode = node->as<SentinelNode>();
+      const char* sentinelName = nullptr;
+
+      switch (sentinelNode->sentinelType()) {
+        case SentinelType::kFuncEnd:
+          sentinelName = "[FuncEnd]";
+          break;
+
+        default:
+          sentinelName = "[Sentinel]";
+          break;
+      }
+
+      ASMJIT_PROPAGATE(sb.append(sentinelName));
+      break;
+    }
+
+#ifndef ASMJIT_NO_COMPILER
+    case NodeType::kFunc: {
+      const FuncNode* funcNode = node->as<FuncNode>();
+
+      if (builder->isCompiler()) {
+        ASMJIT_PROPAGATE(formatLabel(sb, formatOptions.flags(), builder, funcNode->labelId()));
+        ASMJIT_PROPAGATE(sb.append(": "));
+
+        ASMJIT_PROPAGATE(formatFuncRets(sb, formatOptions.flags(), static_cast<const BaseCompiler*>(builder), funcNode->detail()));
+        ASMJIT_PROPAGATE(sb.append(" Func("));
+        ASMJIT_PROPAGATE(formatFuncArgs(sb, formatOptions.flags(), static_cast<const BaseCompiler*>(builder), funcNode->detail(), funcNode->argPacks()));
+        ASMJIT_PROPAGATE(sb.append(")"));
+      }
+      break;
+    }
+
+    case NodeType::kFuncRet: {
+      const FuncRetNode* retNode = node->as<FuncRetNode>();
+      ASMJIT_PROPAGATE(sb.append("[FuncRet]"));
+
+      for (uint32_t i = 0; i < 2; i++) {
+        const Operand_& op = retNode->_opArray[i];
+        if (!op.isNone()) {
+          ASMJIT_PROPAGATE(sb.append(i == 0 ? " " : ", "));
+          ASMJIT_PROPAGATE(formatOperand(sb, formatOptions.flags(), builder, builder->arch(), op));
+        }
+      }
+      break;
+    }
+
+    case NodeType::kInvoke: {
+      const InvokeNode* invokeNode = node->as<InvokeNode>();
+      ASMJIT_PROPAGATE(builder->_funcs.formatInstruction(sb, formatOptions.flags(), builder,
+        builder->arch(),
+        invokeNode->baseInst(), invokeNode->operands(), invokeNode->opCount()));
+      break;
+    }
+#endif
+
+    default: {
+      ASMJIT_PROPAGATE(sb.appendFormat("[UserNode:%u]", node->type()));
+      break;
+    }
+  }
+
+  if (node->hasInlineComment()) {
+    size_t requiredPadding = paddingFromOptions(formatOptions, FormatPaddingGroup::kRegularLine);
+    size_t currentPadding = sb.size() - startLineIndex;
+
+    if (currentPadding < requiredPadding)
+      ASMJIT_PROPAGATE(sb.appendChars(' ', requiredPadding - currentPadding));
+
+    ASMJIT_PROPAGATE(sb.append("; "));
+    ASMJIT_PROPAGATE(sb.append(node->inlineComment()));
+  }
+
+  return kErrorOk;
+}
+
+Error formatNodeList(
+  String& sb,
+  const FormatOptions& formatOptions,
+  const BaseBuilder* builder) noexcept {
+
+  return formatNodeList(sb, formatOptions, builder, builder->firstNode(), nullptr);
+}
+
+Error formatNodeList(
+  String& sb,
+  const FormatOptions& formatOptions,
+  const BaseBuilder* builder,
+  const BaseNode* begin,
+  const BaseNode* end) noexcept {
+
+  const BaseNode* node = begin;
+  while (node != end) {
+    ASMJIT_PROPAGATE(formatNode(sb, formatOptions, builder, node));
+    ASMJIT_PROPAGATE(sb.append('\n'));
+    node = node->next();
+  }
+  return kErrorOk;
+}
+#endif
+
+} // {Formatter}
+
+ASMJIT_END_NAMESPACE
+
+#endif
diff --git a/lib/lepton/asmjit/core/formatter.h b/lib/lepton/asmjit/core/formatter.h
new file mode 100644
index 0000000000..d7a4b93476
--- /dev/null
+++ b/lib/lepton/asmjit/core/formatter.h
@@ -0,0 +1,247 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_FORMATTER_H_INCLUDED
+#define ASMJIT_CORE_FORMATTER_H_INCLUDED
+
+#include "../core/globals.h"
+#include "../core/inst.h"
+#include "../core/string.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_logging
+//! \{
+
+class BaseBuilder;
+class BaseEmitter;
+class BaseNode;
+struct Operand_;
+
+//! Format flags used by \ref Logger and \ref FormatOptions.
+enum class FormatFlags : uint32_t {
+  //! No formatting flags.
+  kNone = 0u,
+
+  //! Show also binary form of each logged instruction (Assembler).
+  kMachineCode = 0x00000001u,
+  //! Show a text explanation of some immediate values.
+  kExplainImms = 0x00000002u,
+  //! Use hexadecimal notation of immediate values.
+  kHexImms = 0x00000004u,
+  //! Use hexadecimal notation of addresses and offsets in addresses.
+  kHexOffsets = 0x00000008u,
+  //! Show casts between virtual register types (Compiler output).
+  kRegCasts = 0x00000010u,
+  //! Show positions associated with nodes (Compiler output).
+  kPositions = 0x00000020u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(FormatFlags)
+
+//! Format indentation group, used by \ref FormatOptions.
+enum class FormatIndentationGroup : uint32_t {
+  //! Indentation used for instructions and directives.
+  kCode = 0u,
+  //! Indentation used for labels and function nodes.
+  kLabel = 1u,
+  //! Indentation used for comments (not inline comments).
+  kComment = 2u,
+
+  //! \cond INTERNAL
+  //! Reserved for future use.
+  kReserved = 3u,
+  //! \endcond
+
+  //! Maximum value of `FormatIndentationGroup`.
+  kMaxValue = kReserved
+};
+
+//! Format padding group, used by \ref FormatOptions.
+enum class FormatPaddingGroup : uint32_t {
+  //! Describes padding of a regular line, which can represent instruction, data, or assembler directives.
+  kRegularLine = 0,
+  //! Describes padding of machine code dump that is visible next to the instruction, if enabled.
+  kMachineCode = 1,
+
+  //! Maximum value of `FormatPaddingGroup`.
+  kMaxValue = kMachineCode
+};
+
+//! Formatting options used by \ref Logger and \ref Formatter.
+class FormatOptions {
+public:
+  //! \name Members
+  //! \{
+
+  //! Format flags.
+  FormatFlags _flags = FormatFlags::kNone;
+  //! Indentations for each indentation group.
+  Support::Array<uint8_t, uint32_t(FormatIndentationGroup::kMaxValue) + 1> _indentation {};
+  //! Paddings for each padding group.
+  Support::Array<uint16_t, uint32_t(FormatPaddingGroup::kMaxValue) + 1> _padding {};
+
+  //! \}
+
+  //! \name Reset
+  //! \{
+
+  //! Resets FormatOptions to its default initialized state.
+  inline void reset() noexcept {
+    _flags = FormatFlags::kNone;
+    _indentation.fill(uint8_t(0));
+    _padding.fill(uint16_t(0));
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns format flags.
+  inline FormatFlags flags() const noexcept { return _flags; }
+  //! Tests whether the given `flag` is set in format flags.
+  inline bool hasFlag(FormatFlags flag) const noexcept { return Support::test(_flags, flag); }
+
+  //! Resets all format flags to `flags`.
+  inline void setFlags(FormatFlags flags) noexcept { _flags = flags; }
+  //! Adds `flags` to format flags.
+  inline void addFlags(FormatFlags flags) noexcept { _flags |= flags; }
+  //! Removes `flags` from format flags.
+  inline void clearFlags(FormatFlags flags) noexcept { _flags &= ~flags; }
+
+  //! Returns indentation for the given indentation `group`.
+  inline uint8_t indentation(FormatIndentationGroup group) const noexcept { return _indentation[group]; }
+  //! Sets indentation for the given indentation `group`.
+  inline void setIndentation(FormatIndentationGroup group, uint32_t n) noexcept { _indentation[group] = uint8_t(n); }
+  //! Resets indentation for the given indentation `group` to zero.
+  inline void resetIndentation(FormatIndentationGroup group) noexcept { _indentation[group] = uint8_t(0); }
+
+  //! Returns pading for the given padding `group`.
+  inline size_t padding(FormatPaddingGroup group) const noexcept { return _padding[group]; }
+  //! Sets pading for the given padding `group`.
+  inline void setPadding(FormatPaddingGroup group, size_t n) noexcept { _padding[group] = uint16_t(n); }
+  //! Resets pading for the given padding `group` to zero, which means that a default padding will be used
+  //! based on the target architecture properties.
+  inline void resetPadding(FormatPaddingGroup group) noexcept { _padding[group] = uint16_t(0); }
+
+  //! \}
+};
+
+//! Provides formatting functionality to format operands, instructions, and nodes.
+namespace Formatter {
+
+#ifndef ASMJIT_NO_LOGGING
+
+//! Appends a formatted `typeId` to the output string `sb`.
+ASMJIT_API Error formatTypeId(
+  String& sb,
+  TypeId typeId) noexcept;
+
+//! Appends a formatted `featureId` to the output string `sb`.
+//!
+//! See \ref CpuFeatures.
+ASMJIT_API Error formatFeature(
+  String& sb,
+  Arch arch,
+  uint32_t featureId) noexcept;
+
+//! Appends a formatted register to the output string `sb`.
+//!
+//! \note Emitter is optional, but it's required to format virtual registers, which won't be formatted properly
+//! if the `emitter` is not provided.
+ASMJIT_API Error formatRegister(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  RegType regType,
+  uint32_t regId) noexcept;
+
+//! Appends a formatted label to the output string `sb`.
+//!
+//! \note Emitter is optional, but it's required to format named labels properly, otherwise the formatted as
+//! it is an anonymous label.
+ASMJIT_API Error formatLabel(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  uint32_t labelId) noexcept;
+
+//! Appends a formatted operand to the output string `sb`.
+//!
+//! \note Emitter is optional, but it's required to format named labels and virtual registers. See
+//! \ref formatRegister() and \ref formatLabel() for more details.
+ASMJIT_API Error formatOperand(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const Operand_& op) noexcept;
+
+//! Appends a formatted data-type to the output string `sb`.
+ASMJIT_API Error formatDataType(
+  String& sb,
+  FormatFlags formatFlags,
+  Arch arch,
+  TypeId typeId) noexcept;
+
+//! Appends a formatted data to the output string `sb`.
+ASMJIT_API Error formatData(
+  String& sb,
+  FormatFlags formatFlags,
+  Arch arch,
+  TypeId typeId, const void* data, size_t itemCount, size_t repeatCount = 1) noexcept;
+
+//! Appends a formatted instruction to the output string `sb`.
+//!
+//! \note Emitter is optional, but it's required to format named labels and virtual registers. See
+//! \ref formatRegister() and \ref formatLabel() for more details.
+ASMJIT_API Error formatInstruction(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept;
+
+#ifndef ASMJIT_NO_BUILDER
+//! Appends a formatted node to the output string `sb`.
+//!
+//! The `node` must belong to the provided `builder`.
+ASMJIT_API Error formatNode(
+  String& sb,
+  const FormatOptions& formatOptions,
+  const BaseBuilder* builder,
+  const BaseNode* node) noexcept;
+
+//! Appends formatted nodes to the output string `sb`.
+//!
+//! All nodes that are part of the given `builder` will be appended.
+ASMJIT_API Error formatNodeList(
+  String& sb,
+  const FormatOptions& formatOptions,
+  const BaseBuilder* builder) noexcept;
+
+//! Appends formatted nodes to the output string `sb`.
+//!
+//! This function works the same as \ref formatNode(), but appends more nodes to the output string,
+//! separating each node with a newline '\n' character.
+ASMJIT_API Error formatNodeList(
+  String& sb,
+  const FormatOptions& formatOptions,
+  const BaseBuilder* builder,
+  const BaseNode* begin,
+  const BaseNode* end) noexcept;
+#endif
+
+#endif
+
+} // {Formatter}
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_FORMATTER_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/formatter_p.h b/lib/lepton/asmjit/core/formatter_p.h
new file mode 100644
index 0000000000..6070fd74f2
--- /dev/null
+++ b/lib/lepton/asmjit/core/formatter_p.h
@@ -0,0 +1,34 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_FORMATTER_P_H_INCLUDED
+#define ASMJIT_CORE_FORMATTER_P_H_INCLUDED
+
+#include "../core/formatter.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_logging
+//! \{
+
+namespace Formatter {
+
+static ASMJIT_FORCE_INLINE size_t paddingFromOptions(const FormatOptions& formatOptions, FormatPaddingGroup group) noexcept {
+  static constexpr uint16_t _defaultPaddingTable[uint32_t(FormatPaddingGroup::kMaxValue) + 1] = { 44, 26 };
+  static_assert(uint32_t(FormatPaddingGroup::kMaxValue) + 1 == 2, "If a new group is defined it must be added here");
+
+  size_t padding = formatOptions.padding(group);
+  return padding ? padding : size_t(_defaultPaddingTable[uint32_t(group)]);
+}
+
+} // {Formatter}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_FORMATTER_H_P_INCLUDED
diff --git a/lib/lepton/asmjit/core/func.cpp b/lib/lepton/asmjit/core/func.cpp
new file mode 100644
index 0000000000..04dc2aaf20
--- /dev/null
+++ b/lib/lepton/asmjit/core/func.cpp
@@ -0,0 +1,286 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/archtraits.h"
+#include "../core/func.h"
+#include "../core/operand.h"
+#include "../core/type.h"
+#include "../core/funcargscontext_p.h"
+
+#if !defined(ASMJIT_NO_X86)
+  #include "../x86/x86func_p.h"
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  #include "../arm/a64func_p.h"
+#endif
+
+ASMJIT_BEGIN_NAMESPACE
+
+// CallConv - Init & Reset
+// =======================
+
+ASMJIT_FAVOR_SIZE Error CallConv::init(CallConvId ccId, const Environment& environment) noexcept {
+  reset();
+
+#if !defined(ASMJIT_NO_X86)
+  if (environment.isFamilyX86())
+    return x86::FuncInternal::initCallConv(*this, ccId, environment);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (environment.isFamilyAArch64())
+    return a64::FuncInternal::initCallConv(*this, ccId, environment);
+#endif
+
+  return DebugUtils::errored(kErrorInvalidArgument);
+}
+
+// FuncDetail - Init / Reset
+// =========================
+
+ASMJIT_FAVOR_SIZE Error FuncDetail::init(const FuncSignature& signature, const Environment& environment) noexcept {
+  CallConvId ccId = signature.callConvId();
+  uint32_t argCount = signature.argCount();
+
+  if (ASMJIT_UNLIKELY(argCount > Globals::kMaxFuncArgs))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  CallConv& cc = _callConv;
+  ASMJIT_PROPAGATE(cc.init(ccId, environment));
+
+  uint32_t registerSize = Environment::registerSizeFromArch(cc.arch());
+  uint32_t deabstractDelta = TypeUtils::deabstractDeltaOfSize(registerSize);
+
+  const TypeId* signatureArgs = signature.args();
+  for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+    FuncValuePack& argPack = _args[argIndex];
+    argPack[0].initTypeId(TypeUtils::deabstract(signatureArgs[argIndex], deabstractDelta));
+  }
+
+  _argCount = uint8_t(argCount);
+  _vaIndex = uint8_t(signature.vaIndex());
+
+  TypeId ret = signature.ret();
+  if (ret != TypeId::kVoid)
+    _rets[0].initTypeId(TypeUtils::deabstract(ret, deabstractDelta));
+
+#if !defined(ASMJIT_NO_X86)
+  if (environment.isFamilyX86())
+    return x86::FuncInternal::initFuncDetail(*this, signature, registerSize);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (environment.isFamilyAArch64())
+    return a64::FuncInternal::initFuncDetail(*this, signature, registerSize);
+#endif
+
+  // We should never bubble here as if `cc.init()` succeeded then there has to be an implementation for the current
+  // architecture. However, stay safe.
+  return DebugUtils::errored(kErrorInvalidArgument);
+}
+
+// FuncFrame - Init
+// ================
+
+ASMJIT_FAVOR_SIZE Error FuncFrame::init(const FuncDetail& func) noexcept {
+  Arch arch = func.callConv().arch();
+  if (!Environment::isValidArch(arch))
+    return DebugUtils::errored(kErrorInvalidArch);
+
+  const ArchTraits& archTraits = ArchTraits::byArch(arch);
+
+  // Initializing FuncFrame means making a copy of some properties of `func`. Properties like `_localStackSize` will
+  // be set by the user before the frame is finalized.
+  reset();
+
+  _arch = arch;
+  _spRegId = uint8_t(archTraits.spRegId());
+  _saRegId = uint8_t(BaseReg::kIdBad);
+
+  uint32_t naturalStackAlignment = func.callConv().naturalStackAlignment();
+  uint32_t minDynamicAlignment = Support::max<uint32_t>(naturalStackAlignment, 16);
+
+  if (minDynamicAlignment == naturalStackAlignment)
+    minDynamicAlignment <<= 1;
+
+  _naturalStackAlignment = uint8_t(naturalStackAlignment);
+  _minDynamicAlignment = uint8_t(minDynamicAlignment);
+  _redZoneSize = uint8_t(func.redZoneSize());
+  _spillZoneSize = uint8_t(func.spillZoneSize());
+  _finalStackAlignment = uint8_t(_naturalStackAlignment);
+
+  if (func.hasFlag(CallConvFlags::kCalleePopsStack)) {
+    _calleeStackCleanup = uint16_t(func.argStackSize());
+  }
+
+  // Initial masks of dirty and preserved registers.
+  for (RegGroup group : RegGroupVirtValues{}) {
+    _dirtyRegs[group] = func.usedRegs(group);
+    _preservedRegs[group] = func.preservedRegs(group);
+  }
+
+  // Exclude stack pointer - this register is never included in saved GP regs.
+  _preservedRegs[RegGroup::kGp] &= ~Support::bitMask(archTraits.spRegId());
+
+  // The size and alignment of save/restore area of registers for each virtual register group
+  _saveRestoreRegSize = func.callConv()._saveRestoreRegSize;
+  _saveRestoreAlignment = func.callConv()._saveRestoreAlignment;
+
+  return kErrorOk;
+}
+
+// FuncFrame - Finalize
+// ====================
+
+ASMJIT_FAVOR_SIZE Error FuncFrame::finalize() noexcept {
+  if (!Environment::isValidArch(arch()))
+    return DebugUtils::errored(kErrorInvalidArch);
+
+  const ArchTraits& archTraits = ArchTraits::byArch(arch());
+
+  uint32_t registerSize = _saveRestoreRegSize[RegGroup::kGp];
+  uint32_t vectorSize = _saveRestoreRegSize[RegGroup::kVec];
+  uint32_t returnAddressSize = archTraits.hasLinkReg() ? 0u : registerSize;
+
+  // The final stack alignment must be updated accordingly to call and local stack alignments.
+  uint32_t stackAlignment = _finalStackAlignment;
+  ASMJIT_ASSERT(stackAlignment == Support::max(_naturalStackAlignment,
+                                               _callStackAlignment,
+                                               _localStackAlignment));
+
+  bool hasFP = hasPreservedFP();
+  bool hasDA = hasDynamicAlignment();
+
+  uint32_t kSp = archTraits.spRegId();
+  uint32_t kFp = archTraits.fpRegId();
+  uint32_t kLr = archTraits.linkRegId();
+
+  // Make frame pointer dirty if the function uses it.
+  if (hasFP) {
+    _dirtyRegs[RegGroup::kGp] |= Support::bitMask(kFp);
+
+    // Currently required by ARM, if this works differently across architectures we would have to generalize most
+    // likely in CallConv.
+    if (kLr != BaseReg::kIdBad)
+      _dirtyRegs[RegGroup::kGp] |= Support::bitMask(kLr);
+  }
+
+  // These two are identical if the function doesn't align its stack dynamically.
+  uint32_t saRegId = _saRegId;
+  if (saRegId == BaseReg::kIdBad)
+    saRegId = kSp;
+
+  // Fix stack arguments base-register from SP to FP in case it was not picked before and the function performs
+  // dynamic stack alignment.
+  if (hasDA && saRegId == kSp)
+    saRegId = kFp;
+
+  // Mark as dirty any register but SP if used as SA pointer.
+  if (saRegId != kSp)
+    _dirtyRegs[RegGroup::kGp] |= Support::bitMask(saRegId);
+
+  _spRegId = uint8_t(kSp);
+  _saRegId = uint8_t(saRegId);
+
+  // Setup stack size used to save preserved registers.
+  uint32_t saveRestoreSizes[2] {};
+  for (RegGroup group : RegGroupVirtValues{})
+    saveRestoreSizes[size_t(!archTraits.hasInstPushPop(group))]
+      += Support::alignUp(Support::popcnt(savedRegs(group)) * saveRestoreRegSize(group), saveRestoreAlignment(group));
+
+  _pushPopSaveSize  = uint16_t(saveRestoreSizes[0]);
+  _extraRegSaveSize = uint16_t(saveRestoreSizes[1]);
+
+  uint32_t v = 0;                            // The beginning of the stack frame relative to SP after prolog.
+  v += callStackSize();                      // Count 'callStackSize'      <- This is used to call functions.
+  v  = Support::alignUp(v, stackAlignment);  // Align to function's stack alignment.
+
+  _localStackOffset = v;                     // Store 'localStackOffset'   <- Function's local stack starts here.
+  v += localStackSize();                     // Count 'localStackSize'     <- Function's local stack ends here.
+
+  // If the function's stack must be aligned, calculate the alignment necessary to store vector registers, and set
+  // `FuncAttributes::kAlignedVecSR` to inform PEI that it can use instructions that perform aligned stores/loads.
+  if (stackAlignment >= vectorSize && _extraRegSaveSize) {
+    addAttributes(FuncAttributes::kAlignedVecSR);
+    v = Support::alignUp(v, vectorSize);     // Align 'extraRegSaveOffset'.
+  }
+
+  _extraRegSaveOffset = v;                   // Store 'extraRegSaveOffset' <- Non-GP save/restore starts here.
+  v += _extraRegSaveSize;                    // Count 'extraRegSaveSize'   <- Non-GP save/restore ends here.
+
+  // Calculate if dynamic alignment (DA) slot (stored as offset relative to SP) is required and its offset.
+  if (hasDA && !hasFP) {
+    _daOffset = v;                           // Store 'daOffset'           <- DA pointer would be stored here.
+    v += registerSize;                       // Count 'daOffset'.
+  }
+  else {
+    _daOffset = FuncFrame::kTagInvalidOffset;
+  }
+
+  // Link Register
+  // -------------
+  //
+  // The stack is aligned after the function call as the return address is stored in a link register. Some
+  // architectures may require to always have aligned stack after PUSH/POP operation, which is represented
+  // by ArchTraits::stackAlignmentConstraint().
+  //
+  // No Link Register (X86/X64)
+  // --------------------------
+  //
+  // The return address should be stored after GP save/restore regs. It has the same size as `registerSize`
+  // (basically the native register/pointer size). We don't adjust it now as `v` now contains the exact size
+  // that the function requires to adjust (call frame + stack frame, vec stack size). The stack (if we consider
+  // this size) is misaligned now, as it's always aligned before the function call - when `call()` is executed
+  // it pushes the current EIP|RIP onto the stack, and misaligns it by 12 or 8 bytes (depending on the
+  // architecture). So count number of bytes needed to align it up to the function's CallFrame (the beginning).
+  if (v || hasFuncCalls() || !returnAddressSize)
+    v += Support::alignUpDiff(v + pushPopSaveSize() + returnAddressSize, stackAlignment);
+
+  _pushPopSaveOffset = v;                    // Store 'pushPopSaveOffset'  <- Function's push/pop save/restore starts here.
+  _stackAdjustment = v;                      // Store 'stackAdjustment'    <- SA used by 'add SP, SA' and 'sub SP, SA'.
+  v += _pushPopSaveSize;                     // Count 'pushPopSaveSize'    <- Function's push/pop save/restore ends here.
+  _finalStackSize = v;                       // Store 'finalStackSize'     <- Final stack used by the function.
+
+  if (!archTraits.hasLinkReg())
+    v += registerSize;                       // Count 'ReturnAddress'      <- As CALL pushes onto stack.
+
+  // If the function performs dynamic stack alignment then the stack-adjustment must be aligned.
+  if (hasDA)
+    _stackAdjustment = Support::alignUp(_stackAdjustment, stackAlignment);
+
+  // Calculate where the function arguments start relative to SP.
+  _saOffsetFromSP = hasDA ? FuncFrame::kTagInvalidOffset : v;
+
+  // Calculate where the function arguments start relative to FP or user-provided register.
+  _saOffsetFromSA = hasFP ? returnAddressSize + registerSize      // Return address + frame pointer.
+                          : returnAddressSize + _pushPopSaveSize; // Return address + all push/pop regs.
+
+  return kErrorOk;
+}
+
+// FuncArgsAssignment - UpdateFuncFrame
+// ====================================
+
+ASMJIT_FAVOR_SIZE Error FuncArgsAssignment::updateFuncFrame(FuncFrame& frame) const noexcept {
+  Arch arch = frame.arch();
+  const FuncDetail* func = funcDetail();
+
+  if (!func)
+    return DebugUtils::errored(kErrorInvalidState);
+
+  RAConstraints constraints;
+  ASMJIT_PROPAGATE(constraints.init(arch));
+
+  FuncArgsContext ctx;
+  ASMJIT_PROPAGATE(ctx.initWorkData(frame, *this, &constraints));
+  ASMJIT_PROPAGATE(ctx.markDstRegsDirty(frame));
+  ASMJIT_PROPAGATE(ctx.markScratchRegs(frame));
+  ASMJIT_PROPAGATE(ctx.markStackArgsReg(frame));
+  return kErrorOk;
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/func.h b/lib/lepton/asmjit/core/func.h
new file mode 100644
index 0000000000..78a41f98f6
--- /dev/null
+++ b/lib/lepton/asmjit/core/func.h
@@ -0,0 +1,1445 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_FUNC_H_INCLUDED
+#define ASMJIT_CORE_FUNC_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/environment.h"
+#include "../core/operand.h"
+#include "../core/type.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_function
+//! \{
+
+//! Calling convention id.
+//!
+//! Calling conventions can be divided into the following groups:
+//!
+//!   - Universal - calling conventions are applicable to any target. They will be converted to a target dependent
+//!     calling convention at runtime by \ref CallConv::init() with some help from \ref Environment. The purpose of
+//!     these calling conventions is to make using functions less target dependent and closer to C and C++.
+//!
+//!   - Target specific - calling conventions that are used by a particular architecture and ABI. For example
+//!     Windows 64-bit calling convention and AMD64 SystemV calling convention.
+enum class CallConvId : uint8_t {
+  //! None or invalid (can't be used).
+  kNone = 0,
+
+  // Universal Calling Conventions
+  // -----------------------------
+
+  //! Standard function call or explicit `__cdecl` where it can be specified.
+  //!
+  //! This is a universal calling convention, which is used to initialize specific calling connventions based on
+  //! architecture, platform, and its ABI.
+  kCDecl = 1,
+
+  //! `__stdcall` on targets that support this calling convention (X86).
+  //!
+  //! \note This calling convention is only supported on 32-bit X86. If used on environment that doesn't support
+  //! this calling convention it will be replaced by \ref CallConvId::kCDecl.
+  kStdCall = 2,
+
+  //! `__fastcall` on targets that support this calling convention (X86).
+  //!
+  //! \note This calling convention is only supported on 32-bit X86. If used on environment that doesn't support
+  //! this calling convention it will be replaced by \ref CallConvId::kCDecl.
+  kFastCall = 3,
+
+  //! `__vectorcall` on targets that support this calling convention (X86/X64).
+  //!
+  //! \note This calling convention is only supported on 32-bit and 64-bit X86 architecture on Windows platform.
+  //! If used on environment that doesn't support this calling it will be replaced by \ref CallConvId::kCDecl.
+  kVectorCall = 4,
+
+  //! `__thiscall` on targets that support this calling convention (X86).
+  //!
+  //! \note This calling convention is only supported on 32-bit X86 Windows platform. If used on environment that
+  //! doesn't support this calling convention it will be replaced by \ref CallConvId::kCDecl.
+  kThisCall = 5,
+
+  //! `__attribute__((regparm(1)))` convention (GCC and Clang).
+  kRegParm1 = 6,
+  //! `__attribute__((regparm(2)))` convention (GCC and Clang).
+  kRegParm2 = 7,
+  //! `__attribute__((regparm(3)))` convention (GCC and Clang).
+  kRegParm3 = 8,
+
+  //! Soft-float calling convention (ARM).
+  //!
+  //! Floating point arguments are passed via general purpose registers.
+  kSoftFloat = 9,
+
+  //! Hard-float calling convention (ARM).
+  //!
+  //! Floating point arguments are passed via SIMD registers.
+  kHardFloat = 10,
+
+  //! AsmJit specific calling convention designed for calling functions inside a multimedia code that don't use many
+  //! registers internally, but are long enough to be called and not inlined. These functions are usually used to
+  //! calculate trigonometric functions, logarithms, etc...
+  kLightCall2 = 16,
+  kLightCall3 = 17,
+  kLightCall4 = 18,
+
+  // ABI-Specific Calling Conventions
+  // --------------------------------
+
+  //! X64 System-V calling convention.
+  kX64SystemV = 32,
+  //! X64 Windows calling convention.
+  kX64Windows = 33,
+
+  //! Maximum value of `CallConvId`.
+  kMaxValue = kX64Windows,
+
+  // Host Calling Conventions
+  // ------------------------
+
+  //! Host calling convention detected at compile-time.
+  kHost =
+#if defined(_DOXYGEN)
+    DETECTED_AT_COMPILE_TIME
+#elif ASMJIT_ARCH_ARM == 32 && defined(__SOFTFP__)
+    kSoftFloat
+#elif ASMJIT_ARCH_ARM == 32 && !defined(__SOFTFP__)
+    kHardFloat
+#else
+    kCDecl
+#endif
+};
+
+//! Strategy used by calling conventions to assign registers to function arguments.
+//!
+//! Calling convention strategy describes how AsmJit should convert function arguments used by \ref FuncSignature
+//! into register identifiers and stack offsets. The \ref CallConvStrategy::kDefault strategy assigns registers
+//! and then stack whereas \ref CallConvStrategy::kX64Windows strategy does register shadowing as defined by WIN64
+//! calling convention, which is only used by 64-bit Windows.
+enum class CallConvStrategy : uint8_t {
+  //! Default register assignment strategy.
+  kDefault = 0,
+  //! Windows 64-bit ABI register assignment strategy.
+  kX64Windows = 1,
+  //! Windows 64-bit __vectorcall register assignment strategy.
+  kX64VectorCall = 2,
+
+  //! Maximum value of `CallConvStrategy`.
+  kMaxValue = kX64VectorCall
+};
+
+//! Calling convention flags.
+enum class CallConvFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+  //! Callee is responsible for cleaning up the stack.
+  kCalleePopsStack = 0x0001u,
+  //! Pass vector arguments indirectly (as a pointer).
+  kIndirectVecArgs = 0x0002u,
+  //! Pass F32 and F64 arguments via VEC128 register.
+  kPassFloatsByVec = 0x0004u,
+  //! Pass MMX and vector arguments via stack if the function has variable arguments.
+  kPassVecByStackIfVA = 0x0008u,
+  //! MMX registers are passed and returned via GP registers.
+  kPassMmxByGp = 0x0010u,
+  //! MMX registers are passed and returned via XMM registers.
+  kPassMmxByXmm = 0x0020u,
+  //! Calling convention can be used with variable arguments.
+  kVarArgCompatible = 0x0080u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(CallConvFlags)
+
+//! Function calling convention.
+//!
+//! Function calling convention is a scheme that defines how function parameters are passed and how function
+//! returns its result. AsmJit defines a variety of architecture and OS specific calling conventions and also
+//! provides a compile time detection to make the code-generation easier.
+struct CallConv {
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    //! Maximum number of register arguments per register group.
+    //!
+    //! \note This is not really AsmJit's limitatation, it's just the number that makes sense considering all common
+    //! calling conventions. Usually even conventions that use registers to pass function arguments are limited to 8
+    //! and less arguments passed via registers per group.
+    kMaxRegArgsPerGroup = 16
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Target architecture.
+  Arch _arch;
+  //! Calling convention id.
+  CallConvId _id;
+  //! Register assignment strategy.
+  CallConvStrategy _strategy;
+
+  //! Red zone size (AMD64 == 128 bytes).
+  uint8_t _redZoneSize;
+  //! Spill zone size (WIN-X64 == 32 bytes).
+  uint8_t _spillZoneSize;
+  //! Natural stack alignment as defined by OS/ABI.
+  uint8_t _naturalStackAlignment;
+
+  //! Calling convention flags.
+  CallConvFlags _flags;
+
+  //! Size to save/restore per register group.
+  Support::Array<uint8_t, Globals::kNumVirtGroups> _saveRestoreRegSize;
+  //! Alignment of save/restore groups.
+  Support::Array<uint8_t, Globals::kNumVirtGroups> _saveRestoreAlignment;
+
+  //! Mask of all passed registers, per group.
+  Support::Array<RegMask, Globals::kNumVirtGroups> _passedRegs;
+  //! Mask of all preserved registers, per group.
+  Support::Array<RegMask, Globals::kNumVirtGroups> _preservedRegs;
+
+  //! Passed registers' order.
+  union RegOrder {
+    //! Passed registers, ordered.
+    uint8_t id[kMaxRegArgsPerGroup];
+    //! Packed IDs in `uint32_t` array.
+    uint32_t packed[(kMaxRegArgsPerGroup + 3) / 4];
+  };
+
+  //! Passed registers' order, per register group.
+  Support::Array<RegOrder, Globals::kNumVirtGroups> _passedOrder;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Initializes this calling convention to the given `ccId` based on the `environment`.
+  //!
+  //! See \ref CallConvId and \ref Environment for more details.
+  ASMJIT_API Error init(CallConvId ccId, const Environment& environment) noexcept;
+
+  //! Resets this CallConv struct into a defined state.
+  //!
+  //! It's recommended to reset the \ref CallConv struct in case you would like create a custom calling convention
+  //! as it prevents from using an uninitialized data (CallConv doesn't have a constructor that would initialize it,
+  //! it's just a struct).
+  inline void reset() noexcept {
+    memset(this, 0, sizeof(*this));
+    memset(_passedOrder.data(), 0xFF, sizeof(_passedOrder));
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the target architecture of this calling convention.
+  inline Arch arch() const noexcept { return _arch; }
+  //! Sets the target architecture of this calling convention.
+  inline void setArch(Arch arch) noexcept { _arch = arch; }
+
+  //! Returns the calling convention id.
+  inline CallConvId id() const noexcept { return _id; }
+  //! Sets the calling convention id.
+  inline void setId(CallConvId ccId) noexcept { _id = ccId; }
+
+  //! Returns the strategy used to assign registers to arguments.
+  inline CallConvStrategy strategy() const noexcept { return _strategy; }
+  //! Sets the strategy used to assign registers to arguments.
+  inline void setStrategy(CallConvStrategy ccStrategy) noexcept { _strategy = ccStrategy; }
+
+  //! Tests whether the calling convention has the given `flag` set.
+  inline bool hasFlag(CallConvFlags flag) const noexcept { return Support::test(_flags, flag); }
+  //! Returns the calling convention flags, see `Flags`.
+  inline CallConvFlags flags() const noexcept { return _flags; }
+  //! Adds the calling convention flags, see `Flags`.
+  inline void setFlags(CallConvFlags flag) noexcept { _flags = flag; };
+  //! Adds the calling convention flags, see `Flags`.
+  inline void addFlags(CallConvFlags flags) noexcept { _flags |= flags; };
+
+  //! Tests whether this calling convention specifies 'RedZone'.
+  inline bool hasRedZone() const noexcept { return _redZoneSize != 0; }
+  //! Tests whether this calling convention specifies 'SpillZone'.
+  inline bool hasSpillZone() const noexcept { return _spillZoneSize != 0; }
+
+  //! Returns size of 'RedZone'.
+  inline uint32_t redZoneSize() const noexcept { return _redZoneSize; }
+  //! Returns size of 'SpillZone'.
+  inline uint32_t spillZoneSize() const noexcept { return _spillZoneSize; }
+
+  //! Sets size of 'RedZone'.
+  inline void setRedZoneSize(uint32_t size) noexcept { _redZoneSize = uint8_t(size); }
+  //! Sets size of 'SpillZone'.
+  inline void setSpillZoneSize(uint32_t size) noexcept { _spillZoneSize = uint8_t(size); }
+
+  //! Returns a natural stack alignment.
+  inline uint32_t naturalStackAlignment() const noexcept { return _naturalStackAlignment; }
+  //! Sets a natural stack alignment.
+  //!
+  //! This function can be used to override the default stack alignment in case that you know that it's alignment is
+  //! different. For example it allows to implement custom calling conventions that guarantee higher stack alignment.
+  inline void setNaturalStackAlignment(uint32_t value) noexcept { _naturalStackAlignment = uint8_t(value); }
+
+  //! Returns the size of a register (or its part) to be saved and restored of the given `group`.
+  inline uint32_t saveRestoreRegSize(RegGroup group) const noexcept { return _saveRestoreRegSize[group]; }
+  //! Sets the size of a vector register (or its part) to be saved and restored.
+  inline void setSaveRestoreRegSize(RegGroup group, uint32_t size) noexcept { _saveRestoreRegSize[group] = uint8_t(size); }
+
+  //! Returns the alignment of a save-restore area of the given `group`.
+  inline uint32_t saveRestoreAlignment(RegGroup group) const noexcept { return _saveRestoreAlignment[group]; }
+  //! Sets the alignment of a save-restore area of the given `group`.
+  inline void setSaveRestoreAlignment(RegGroup group, uint32_t alignment) noexcept { _saveRestoreAlignment[group] = uint8_t(alignment); }
+
+  //! Returns the order of passed registers of the given `group`.
+  inline const uint8_t* passedOrder(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _passedOrder[size_t(group)].id;
+  }
+
+  //! Returns the mask of passed registers of the given `group`.
+  inline RegMask passedRegs(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _passedRegs[size_t(group)];
+  }
+
+  inline void _setPassedPacked(RegGroup group, uint32_t p0, uint32_t p1, uint32_t p2, uint32_t p3) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+
+    _passedOrder[group].packed[0] = p0;
+    _passedOrder[group].packed[1] = p1;
+    _passedOrder[group].packed[2] = p2;
+    _passedOrder[group].packed[3] = p3;
+  }
+
+  //! Resets the order and mask of passed registers.
+  inline void setPassedToNone(RegGroup group) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+
+    _setPassedPacked(group, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu);
+    _passedRegs[size_t(group)] = 0u;
+  }
+
+  //! Sets the order and mask of passed registers.
+  inline void setPassedOrder(RegGroup group, uint32_t a0, uint32_t a1 = 0xFF, uint32_t a2 = 0xFF, uint32_t a3 = 0xFF, uint32_t a4 = 0xFF, uint32_t a5 = 0xFF, uint32_t a6 = 0xFF, uint32_t a7 = 0xFF) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+
+    // NOTE: This should always be called with all arguments known at compile time, so even if it looks scary it
+    // should be translated into few instructions.
+    _setPassedPacked(group, Support::bytepack32_4x8(a0, a1, a2, a3),
+                            Support::bytepack32_4x8(a4, a5, a6, a7),
+                            0xFFFFFFFFu,
+                            0xFFFFFFFFu);
+
+    _passedRegs[group] = (a0 != 0xFF ? 1u << a0 : 0u) |
+                         (a1 != 0xFF ? 1u << a1 : 0u) |
+                         (a2 != 0xFF ? 1u << a2 : 0u) |
+                         (a3 != 0xFF ? 1u << a3 : 0u) |
+                         (a4 != 0xFF ? 1u << a4 : 0u) |
+                         (a5 != 0xFF ? 1u << a5 : 0u) |
+                         (a6 != 0xFF ? 1u << a6 : 0u) |
+                         (a7 != 0xFF ? 1u << a7 : 0u) ;
+  }
+
+  //! Returns preserved register mask of the given `group`.
+  inline RegMask preservedRegs(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _preservedRegs[group];
+  }
+
+  //! Sets preserved register mask of the given `group`.
+  inline void setPreservedRegs(RegGroup group, RegMask regs) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    _preservedRegs[group] = regs;
+  }
+
+  //! \}
+};
+
+//! Function signature.
+//!
+//! Contains information about function return type, count of arguments and their TypeIds. Function signature is
+//! a low level structure which doesn't contain platform specific or calling convention specific information.
+struct FuncSignature {
+  //! \name Constants
+  //! \{
+
+  enum : uint8_t {
+    //! Doesn't have variable number of arguments (`...`).
+    kNoVarArgs = 0xFFu
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Calling convention id.
+  CallConvId _ccId;
+  //! Count of arguments.
+  uint8_t _argCount;
+  //! Index of a first VA or `kNoVarArgs`.
+  uint8_t _vaIndex;
+  //! Return value TypeId.
+  TypeId _ret;
+  //! Function arguments TypeIds.
+  const TypeId* _args;
+
+  //! \}
+
+  //! \name Initializtion & Reset
+  //! \{
+
+  //! Initializes the function signature.
+  inline void init(CallConvId ccId, uint32_t vaIndex, TypeId ret, const TypeId* args, uint32_t argCount) noexcept {
+    ASMJIT_ASSERT(argCount <= 0xFF);
+
+    _ccId = ccId;
+    _argCount = uint8_t(argCount);
+    _vaIndex = uint8_t(vaIndex);
+    _ret = ret;
+    _args = args;
+  }
+
+  inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the calling convention.
+  inline CallConvId callConvId() const noexcept { return _ccId; }
+  //! Sets the calling convention to `ccId`;
+  inline void setCallConvId(CallConvId ccId) noexcept { _ccId = ccId; }
+
+  //! Tests whether the function has variable number of arguments (...).
+  inline bool hasVarArgs() const noexcept { return _vaIndex != kNoVarArgs; }
+  //! Returns the variable arguments (...) index, `kNoVarArgs` if none.
+  inline uint32_t vaIndex() const noexcept { return _vaIndex; }
+  //! Sets the variable arguments (...) index to `index`.
+  inline void setVaIndex(uint32_t index) noexcept { _vaIndex = uint8_t(index); }
+  //! Resets the variable arguments index (making it a non-va function).
+  inline void resetVaIndex() noexcept { _vaIndex = kNoVarArgs; }
+
+  //! Returns the number of function arguments.
+  inline uint32_t argCount() const noexcept { return _argCount; }
+
+  inline bool hasRet() const noexcept { return _ret != TypeId::kVoid; }
+  //! Returns the return value type.
+  inline TypeId ret() const noexcept { return _ret; }
+
+  //! Returns the type of the argument at index `i`.
+  inline TypeId arg(uint32_t i) const noexcept {
+    ASMJIT_ASSERT(i < _argCount);
+    return _args[i];
+  }
+  //! Returns the array of function arguments' types.
+  inline const TypeId* args() const noexcept { return _args; }
+
+  //! \}
+};
+
+template<typename... RET_ARGS>
+class FuncSignatureT : public FuncSignature {
+public:
+  inline FuncSignatureT(CallConvId ccId = CallConvId::kHost, uint32_t vaIndex = kNoVarArgs) noexcept {
+    static constexpr TypeId ret_args[] = { (TypeId(TypeUtils::TypeIdOfT<RET_ARGS>::kTypeId))... };
+    init(ccId, vaIndex, ret_args[0], ret_args + 1, uint32_t(ASMJIT_ARRAY_SIZE(ret_args) - 1));
+  }
+};
+
+//! Function signature builder.
+class FuncSignatureBuilder : public FuncSignature {
+public:
+  TypeId _builderArgList[Globals::kMaxFuncArgs];
+
+  //! \name Initializtion & Reset
+  //! \{
+
+  inline FuncSignatureBuilder(CallConvId ccId = CallConvId::kHost, uint32_t vaIndex = kNoVarArgs) noexcept {
+    init(ccId, vaIndex, TypeId::kVoid, _builderArgList, 0);
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Sets the return type to `retType`.
+  inline void setRet(TypeId retType) noexcept { _ret = retType; }
+  //! Sets the return type based on `T`.
+  template<typename T>
+  inline void setRetT() noexcept { setRet(TypeId(TypeUtils::TypeIdOfT<T>::kTypeId)); }
+
+  //! Sets the argument at index `index` to `argType`.
+  inline void setArg(uint32_t index, TypeId argType) noexcept {
+    ASMJIT_ASSERT(index < _argCount);
+    _builderArgList[index] = argType;
+  }
+  //! Sets the argument at index `i` to the type based on `T`.
+  template<typename T>
+  inline void setArgT(uint32_t index) noexcept { setArg(index, TypeId(TypeUtils::TypeIdOfT<T>::kTypeId)); }
+
+  //! Appends an argument of `type` to the function prototype.
+  inline void addArg(TypeId type) noexcept {
+    ASMJIT_ASSERT(_argCount < Globals::kMaxFuncArgs);
+    _builderArgList[_argCount++] = type;
+  }
+  //! Appends an argument of type based on `T` to the function prototype.
+  template<typename T>
+  inline void addArgT() noexcept { addArg(TypeId(TypeUtils::TypeIdOfT<T>::kTypeId)); }
+
+  //! \}
+};
+
+//! Argument or return value (or its part) as defined by `FuncSignature`, but with register or stack address
+//! (and other metadata) assigned.
+struct FuncValue {
+  //! \name Constants
+  //! \{
+
+  enum Bits : uint32_t {
+    kTypeIdShift      = 0,             //!< TypeId shift.
+    kTypeIdMask       = 0x000000FFu,   //!< TypeId mask.
+
+    kFlagIsReg        = 0x00000100u,   //!< Passed by register.
+    kFlagIsStack      = 0x00000200u,   //!< Passed by stack.
+    kFlagIsIndirect   = 0x00000400u,   //!< Passed indirectly by reference (internally a pointer).
+    kFlagIsDone       = 0x00000800u,   //!< Used internally by arguments allocator.
+
+    kStackOffsetShift = 12,            //!< Stack offset shift.
+    kStackOffsetMask  = 0xFFFFF000u,   //!< Stack offset mask (must occupy MSB bits).
+
+    kRegIdShift       = 16,            //!< RegId shift.
+    kRegIdMask        = 0x00FF0000u,   //!< RegId mask.
+
+    kRegTypeShift     = 24,            //!< RegType shift.
+    kRegTypeMask      = 0xFF000000u    //!< RegType mask.
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  uint32_t _data;
+
+  //! \}
+
+  //! \name Initializtion & Reset
+  //!
+  //! These initialize the whole `FuncValue` to either register or stack. Useful when you know all of these
+  //! properties and wanna just set it up.
+  //!
+  //! \{
+
+  //! Initializes the `typeId` of this `FuncValue`.
+  inline void initTypeId(TypeId typeId) noexcept {
+    _data = uint32_t(typeId) << kTypeIdShift;
+  }
+
+  inline void initReg(RegType regType, uint32_t regId, TypeId typeId, uint32_t flags = 0) noexcept {
+    _data = (uint32_t(regType) << kRegTypeShift) | (regId << kRegIdShift) | (uint32_t(typeId) << kTypeIdShift) | kFlagIsReg | flags;
+  }
+
+  inline void initStack(int32_t offset, TypeId typeId) noexcept {
+    _data = (uint32_t(offset) << kStackOffsetShift) | (uint32_t(typeId) << kTypeIdShift) | kFlagIsStack;
+  }
+
+  //! Resets the value to its unassigned state.
+  inline void reset() noexcept { _data = 0; }
+
+  //! \}
+
+  //! \name Assign
+  //!
+  //! These initialize only part of `FuncValue`, useful when building `FuncValue` incrementally. The caller
+  //! should first init the type-id by caliing `initTypeId` and then continue building either register or stack.
+  //!
+  //! \{
+
+  inline void assignRegData(RegType regType, uint32_t regId) noexcept {
+    ASMJIT_ASSERT((_data & (kRegTypeMask | kRegIdMask)) == 0);
+    _data |= (uint32_t(regType) << kRegTypeShift) | (regId << kRegIdShift) | kFlagIsReg;
+  }
+
+  inline void assignStackOffset(int32_t offset) noexcept {
+    ASMJIT_ASSERT((_data & kStackOffsetMask) == 0);
+    _data |= (uint32_t(offset) << kStackOffsetShift) | kFlagIsStack;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns true if the value is initialized (explicit bool cast).
+  inline explicit operator bool() const noexcept { return _data != 0; }
+
+  inline void _replaceValue(uint32_t mask, uint32_t value) noexcept { _data = (_data & ~mask) | value; }
+
+  //! Tests whether the `FuncValue` has a flag `flag` set.
+  inline bool hasFlag(uint32_t flag) const noexcept { return Support::test(_data, flag); }
+  //! Adds `flags` to `FuncValue`.
+  inline void addFlags(uint32_t flags) noexcept { _data |= flags; }
+  //! Clears `flags` of `FuncValue`.
+  inline void clearFlags(uint32_t flags) noexcept { _data &= ~flags; }
+
+  //! Tests whether the value is initialized (i.e. contains a valid data).
+  inline bool isInitialized() const noexcept { return _data != 0; }
+  //! Tests whether the argument is passed by register.
+  inline bool isReg() const noexcept { return hasFlag(kFlagIsReg); }
+  //! Tests whether the argument is passed by stack.
+  inline bool isStack() const noexcept { return hasFlag(kFlagIsStack); }
+  //! Tests whether the argument is passed by register.
+  inline bool isAssigned() const noexcept { return hasFlag(kFlagIsReg | kFlagIsStack); }
+  //! Tests whether the argument is passed through a pointer (used by WIN64 to pass XMM|YMM|ZMM).
+  inline bool isIndirect() const noexcept { return hasFlag(kFlagIsIndirect); }
+
+  //! Tests whether the argument was already processed (used internally).
+  inline bool isDone() const noexcept { return hasFlag(kFlagIsDone); }
+
+  //! Returns a register type of the register used to pass function argument or return value.
+  inline RegType regType() const noexcept { return RegType((_data & kRegTypeMask) >> kRegTypeShift); }
+  //! Sets a register type of the register used to pass function argument or return value.
+  inline void setRegType(RegType regType) noexcept { _replaceValue(kRegTypeMask, uint32_t(regType) << kRegTypeShift); }
+
+  //! Returns a physical id of the register used to pass function argument or return value.
+  inline uint32_t regId() const noexcept { return (_data & kRegIdMask) >> kRegIdShift; }
+  //! Sets a physical id of the register used to pass function argument or return value.
+  inline void setRegId(uint32_t regId) noexcept { _replaceValue(kRegIdMask, regId << kRegIdShift); }
+
+  //! Returns a stack offset of this argument.
+  inline int32_t stackOffset() const noexcept { return int32_t(_data & kStackOffsetMask) >> kStackOffsetShift; }
+  //! Sets a stack offset of this argument.
+  inline void setStackOffset(int32_t offset) noexcept { _replaceValue(kStackOffsetMask, uint32_t(offset) << kStackOffsetShift); }
+
+  //! Tests whether the argument or return value has associated `TypeId`.
+  inline bool hasTypeId() const noexcept { return Support::test(_data, kTypeIdMask); }
+  //! Returns a TypeId of this argument or return value.
+  inline TypeId typeId() const noexcept { return TypeId((_data & kTypeIdMask) >> kTypeIdShift); }
+  //! Sets a TypeId of this argument or return value.
+  inline void setTypeId(TypeId typeId) noexcept { _replaceValue(kTypeIdMask, uint32_t(typeId) << kTypeIdShift); }
+
+  //! \}
+};
+
+//! Contains multiple `FuncValue` instances in an array so functions that use multiple registers for arguments or
+//! return values can represent all inputs and outputs.
+struct FuncValuePack {
+public:
+  //! \name Members
+  //! \{
+
+  //! Values of the pack.
+  FuncValue _values[Globals::kMaxValuePack];
+
+  //! \}
+
+  //! \name Initialization & Reset
+  //! \{
+
+  //! Resets all values in the pack.
+  inline void reset() noexcept {
+    for (size_t i = 0; i < Globals::kMaxValuePack; i++)
+      _values[i].reset();
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Calculates how many values are in the pack, checking for non-values from the end.
+  inline uint32_t count() const noexcept {
+    uint32_t n = Globals::kMaxValuePack;
+    while (n && !_values[n - 1])
+      n--;
+    return n;
+  }
+
+  inline FuncValue* values() noexcept { return _values; }
+  inline const FuncValue* values() const noexcept { return _values; }
+
+  inline void resetValue(size_t index) noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxValuePack);
+    _values[index].reset();
+  }
+
+  inline bool hasValue(size_t index) noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxValuePack);
+    return _values[index].isInitialized();
+  }
+
+  inline void assignReg(size_t index, const BaseReg& reg, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxValuePack);
+    ASMJIT_ASSERT(reg.isPhysReg());
+    _values[index].initReg(reg.type(), reg.id(), typeId);
+  }
+
+  inline void assignReg(size_t index, RegType regType, uint32_t regId, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxValuePack);
+    _values[index].initReg(regType, regId, typeId);
+  }
+
+  inline void assignStack(size_t index, int32_t offset, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxValuePack);
+    _values[index].initStack(offset, typeId);
+  }
+
+  inline FuncValue& operator[](size_t index) {
+    ASMJIT_ASSERT(index < Globals::kMaxValuePack);
+    return _values[index];
+  }
+
+  inline const FuncValue& operator[](size_t index) const {
+    ASMJIT_ASSERT(index < Globals::kMaxValuePack);
+    return _values[index];
+  }
+
+  //! \}
+};
+
+//! Attributes are designed in a way that all are initially false, and user or \ref FuncFrame finalizer adds
+//! them when necessary.
+enum class FuncAttributes : uint32_t {
+  //! No attributes.
+  kNoAttributes = 0,
+
+  //! Function has variable number of arguments.
+  kHasVarArgs = 0x00000001u,
+  //! Preserve frame pointer (don't omit FP).
+  kHasPreservedFP = 0x00000010u,
+  //! Function calls other functions (is not leaf).
+  kHasFuncCalls = 0x00000020u,
+  //! Function has aligned save/restore of vector registers.
+  kAlignedVecSR = 0x00000040u,
+  //! FuncFrame is finalized and can be used by prolog/epilog inserter (PEI).
+  kIsFinalized = 0x00000800u,
+
+  // X86 Specific Attributes
+  // -----------------------
+
+  //! Enables the use of AVX within the function's body, prolog, and epilog (X86).
+  //!
+  //! This flag instructs prolog and epilog emitter to use AVX instead of SSE for manipulating XMM registers.
+  kX86_AVXEnabled = 0x00010000u,
+
+  //! Enables the use of AVX-512 within the function's body, prolog, and epilog (X86).
+  //!
+  //! This flag instructs Compiler register allocator to use additional 16 registers introduced by AVX-512.
+  //! Additionally, if the functions saves full width of ZMM registers (custom calling conventions only) then
+  //! the prolog/epilog inserter would use AVX-512 move instructions to emit the save and restore sequence.
+  kX86_AVX512Enabled = 0x00020000u,
+
+  //! This flag instructs the epilog writer to emit EMMS instruction before RET (X86).
+  kX86_MMXCleanup = 0x00040000u,
+
+  //! This flag instructs the epilog writer to emit VZEROUPPER instruction before RET (X86).
+  kX86_AVXCleanup = 0x00080000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(FuncAttributes)
+
+//! Function detail - \ref CallConv and expanded \ref FuncSignature.
+//!
+//! Function detail is architecture and OS dependent representation of a function. It contains a materialized
+//! calling convention and expanded function signature so all arguments have assigned either register type/id
+//! or stack address.
+class FuncDetail {
+public:
+  //! \name Constants
+  //! \{
+
+  enum : uint8_t {
+    //! Doesn't have variable number of arguments (`...`).
+    kNoVarArgs = 0xFFu
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Calling convention.
+  CallConv _callConv;
+  //! Number of function arguments.
+  uint8_t _argCount;
+  //! Variable arguments index of `kNoVarArgs`.
+  uint8_t _vaIndex;
+  //! Reserved for future use.
+  uint16_t _reserved;
+  //! Registers that contain arguments.
+  Support::Array<RegMask, Globals::kNumVirtGroups> _usedRegs;
+  //! Size of arguments passed by stack.
+  uint32_t _argStackSize;
+  //! Function return value(s).
+  FuncValuePack _rets;
+  //! Function arguments.
+  FuncValuePack _args[Globals::kMaxFuncArgs];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline FuncDetail() noexcept { reset(); }
+  inline FuncDetail(const FuncDetail& other) noexcept = default;
+
+  //! Initializes this `FuncDetail` to the given signature.
+  ASMJIT_API Error init(const FuncSignature& signature, const Environment& environment) noexcept;
+  inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the function's calling convention, see `CallConv`.
+  inline const CallConv& callConv() const noexcept { return _callConv; }
+
+  //! Returns the associated calling convention flags, see `CallConv::Flags`.
+  inline CallConvFlags flags() const noexcept { return _callConv.flags(); }
+  //! Checks whether a CallConv `flag` is set, see `CallConv::Flags`.
+  inline bool hasFlag(CallConvFlags ccFlag) const noexcept { return _callConv.hasFlag(ccFlag); }
+
+  //! Tests whether the function has a return value.
+  inline bool hasRet() const noexcept { return bool(_rets[0]); }
+  //! Returns the number of function arguments.
+  inline uint32_t argCount() const noexcept { return _argCount; }
+
+  //! Returns function return values.
+  inline FuncValuePack& retPack() noexcept { return _rets; }
+  //! Returns function return values.
+  inline const FuncValuePack& retPack() const noexcept { return _rets; }
+
+  //! Returns a function return value associated with the given `valueIndex`.
+  inline FuncValue& ret(size_t valueIndex = 0) noexcept { return _rets[valueIndex]; }
+  //! Returns a function return value associated with the given `valueIndex` (const).
+  inline const FuncValue& ret(size_t valueIndex = 0) const noexcept { return _rets[valueIndex]; }
+
+  //! Returns function argument packs array.
+  inline FuncValuePack* argPacks() noexcept { return _args; }
+  //! Returns function argument packs array (const).
+  inline const FuncValuePack* argPacks() const noexcept { return _args; }
+
+  //! Returns function argument pack at the given `argIndex`.
+  inline FuncValuePack& argPack(size_t argIndex) noexcept {
+    ASMJIT_ASSERT(argIndex < Globals::kMaxFuncArgs);
+    return _args[argIndex];
+  }
+
+  //! Returns function argument pack at the given `argIndex` (const).
+  inline const FuncValuePack& argPack(size_t argIndex) const noexcept {
+    ASMJIT_ASSERT(argIndex < Globals::kMaxFuncArgs);
+    return _args[argIndex];
+  }
+
+  //! Returns an argument at `valueIndex` from the argument pack at the given `argIndex`.
+  inline FuncValue& arg(size_t argIndex, size_t valueIndex = 0) noexcept {
+    ASMJIT_ASSERT(argIndex < Globals::kMaxFuncArgs);
+    return _args[argIndex][valueIndex];
+  }
+
+  //! Returns an argument at `valueIndex` from the argument pack at the given `argIndex` (const).
+  inline const FuncValue& arg(size_t argIndex, size_t valueIndex = 0) const noexcept {
+    ASMJIT_ASSERT(argIndex < Globals::kMaxFuncArgs);
+    return _args[argIndex][valueIndex];
+  }
+
+  //! Resets an argument at the given `argIndex`.
+  //!
+  //! If the argument is a parameter pack (has multiple values) all values are reset.
+  inline void resetArg(size_t argIndex) noexcept {
+    ASMJIT_ASSERT(argIndex < Globals::kMaxFuncArgs);
+    _args[argIndex].reset();
+  }
+
+  //! Tests whether the function has variable arguments.
+  inline bool hasVarArgs() const noexcept { return _vaIndex != kNoVarArgs; }
+  //! Returns an index of a first variable argument.
+  inline uint32_t vaIndex() const noexcept { return _vaIndex; }
+
+  //! Tests whether the function passes one or more argument by stack.
+  inline bool hasStackArgs() const noexcept { return _argStackSize != 0; }
+  //! Returns stack size needed for function arguments passed on the stack.
+  inline uint32_t argStackSize() const noexcept { return _argStackSize; }
+
+  //! Returns red zone size.
+  inline uint32_t redZoneSize() const noexcept { return _callConv.redZoneSize(); }
+  //! Returns spill zone size.
+  inline uint32_t spillZoneSize() const noexcept { return _callConv.spillZoneSize(); }
+  //! Returns natural stack alignment.
+  inline uint32_t naturalStackAlignment() const noexcept { return _callConv.naturalStackAlignment(); }
+
+  //! Returns a mask of all passed registers of the given register `group`.
+  inline RegMask passedRegs(RegGroup group) const noexcept { return _callConv.passedRegs(group); }
+  //! Returns a mask of all preserved registers of the given register `group`.
+  inline RegMask preservedRegs(RegGroup group) const noexcept { return _callConv.preservedRegs(group); }
+
+  //! Returns a mask of all used registers of the given register `group`.
+  inline RegMask usedRegs(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _usedRegs[size_t(group)];
+  }
+
+  //! Adds `regs` to the mask of used registers of the given register `group`.
+  inline void addUsedRegs(RegGroup group, RegMask regs) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    _usedRegs[size_t(group)] |= regs;
+  }
+
+  //! \}
+};
+
+//! Function frame.
+//!
+//! Function frame is used directly by prolog and epilog insertion (PEI) utils. It provides information necessary to
+//! insert a proper and ABI comforming prolog and epilog. Function frame calculation is based on `CallConv` and
+//! other function attributes.
+//!
+//! SSE vs AVX vs AVX-512
+//! ---------------------
+//!
+//! Function frame provides a way to tell prolog/epilog inserter to use AVX instructions instead of SSE. Use
+//! `setAvxEnabled()` and `setAvx512Enabled()`  to enable AVX and/or AVX-512, respectively. Enabling AVX-512
+//! is mostly for Compiler as it would use 32 SIMD registers instead of 16 when enabled.
+//!
+//! \note If your code uses AVX instructions and AVX is not enabled there would be a performance hit in case that
+//! some registers had to be saved/restored in function's prolog/epilog, respectively. Thus, it's recommended to
+//! always let the function frame know about the use of AVX.
+//!
+//! Function Frame Structure
+//! ------------------------
+//!
+//! Various properties can contribute to the size and structure of the function frame. The function frame in most
+//! cases won't use all of the properties illustrated (for example Spill Zone and Red Zone are never used together).
+//!
+//! ```
+//!   +-----------------------------+
+//!   | Arguments Passed by Stack   |
+//!   +-----------------------------+
+//!   | Spill Zone                  |
+//!   +-----------------------------+ <- Stack offset (args) starts from here.
+//!   | Return Address, if Pushed   |
+//!   +-----------------------------+ <- Stack pointer (SP) upon entry.
+//!   | Save/Restore Stack.         |
+//!   +-----------------------------+-----------------------------+
+//!   | Local Stack                 |                             |
+//!   +-----------------------------+          Final Stack        |
+//!   | Call Stack                  |                             |
+//!   +-----------------------------+-----------------------------+ <- SP after prolog.
+//!   | Red Zone                    |
+//!   +-----------------------------+
+//! ```
+class FuncFrame {
+public:
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    //! Tag used to inform that some offset is invalid.
+    kTagInvalidOffset = 0xFFFFFFFFu
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Function attributes.
+  FuncAttributes _attributes;
+
+  //! Target architecture.
+  Arch _arch;
+  //! SP register ID (to access call stack and local stack).
+  uint8_t _spRegId;
+  //! SA register ID (to access stack arguments).
+  uint8_t _saRegId;
+
+  //! Red zone size (copied from CallConv).
+  uint8_t _redZoneSize;
+  //! Spill zone size (copied from CallConv).
+  uint8_t _spillZoneSize;
+  //! Natural stack alignment (copied from CallConv).
+  uint8_t _naturalStackAlignment;
+  //! Minimum stack alignment to turn on dynamic alignment.
+  uint8_t _minDynamicAlignment;
+
+  //! Call stack alignment.
+  uint8_t _callStackAlignment;
+  //! Local stack alignment.
+  uint8_t _localStackAlignment;
+  //! Final stack alignment.
+  uint8_t _finalStackAlignment;
+
+  //! Adjustment of the stack before returning (X86-STDCALL).
+  uint16_t _calleeStackCleanup;
+
+  //! Call stack size.
+  uint32_t _callStackSize;
+  //! Local stack size.
+  uint32_t _localStackSize;
+  //! Final stack size (sum of call stack and local stack).
+  uint32_t _finalStackSize;
+
+  //! Local stack offset (non-zero only if call stack is used).
+  uint32_t _localStackOffset;
+  //! Offset relative to SP that contains previous SP (before alignment).
+  uint32_t _daOffset;
+  //! Offset of the first stack argument relative to SP.
+  uint32_t _saOffsetFromSP;
+  //! Offset of the first stack argument relative to SA (_saRegId or FP).
+  uint32_t _saOffsetFromSA;
+
+  //! Local stack adjustment in prolog/epilog.
+  uint32_t _stackAdjustment;
+
+  //! Registers that are dirty.
+  Support::Array<RegMask, Globals::kNumVirtGroups> _dirtyRegs;
+  //! Registers that must be preserved (copied from CallConv).
+  Support::Array<RegMask, Globals::kNumVirtGroups> _preservedRegs;
+  //! Size to save/restore per register group.
+  Support::Array<uint8_t, Globals::kNumVirtGroups> _saveRestoreRegSize;
+  //! Alignment of save/restore area per register group.
+  Support::Array<uint8_t, Globals::kNumVirtGroups> _saveRestoreAlignment;
+
+  //! Stack size required to save registers with push/pop.
+  uint16_t _pushPopSaveSize;
+  //! Stack size required to save extra registers that cannot use push/pop.
+  uint16_t _extraRegSaveSize;
+  //! Offset where registers saved/restored via push/pop are stored
+  uint32_t _pushPopSaveOffset;
+  //! Offset where extra ragisters that cannot use push/pop are stored.
+  uint32_t _extraRegSaveOffset;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline FuncFrame() noexcept { reset(); }
+  inline FuncFrame(const FuncFrame& other) noexcept = default;
+
+  ASMJIT_API Error init(const FuncDetail& func) noexcept;
+
+  inline void reset() noexcept {
+    memset(this, 0, sizeof(FuncFrame));
+    _spRegId = BaseReg::kIdBad;
+    _saRegId = BaseReg::kIdBad;
+    _daOffset = kTagInvalidOffset;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the target architecture of the function frame.
+  inline Arch arch() const noexcept { return _arch; }
+
+  //! Returns function frame attributes, see `Attributes`.
+  inline FuncAttributes attributes() const noexcept { return _attributes; }
+  //! Checks whether the FuncFame contains an attribute `attr`.
+  inline bool hasAttribute(FuncAttributes attr) const noexcept { return Support::test(_attributes, attr); }
+  //! Adds attributes `attrs` to the FuncFrame.
+  inline void addAttributes(FuncAttributes attrs) noexcept { _attributes |= attrs; }
+  //! Clears attributes `attrs` from the FrameFrame.
+  inline void clearAttributes(FuncAttributes attrs) noexcept { _attributes &= ~attrs; }
+
+  //! Tests whether the function has variable number of arguments.
+  inline bool hasVarArgs() const noexcept { return hasAttribute(FuncAttributes::kHasVarArgs); }
+  //! Sets the variable arguments flag.
+  inline void setVarArgs() noexcept { addAttributes(FuncAttributes::kHasVarArgs); }
+  //! Resets variable arguments flag.
+  inline void resetVarArgs() noexcept { clearAttributes(FuncAttributes::kHasVarArgs); }
+
+  //! Tests whether the function preserves frame pointer (EBP|ESP on X86).
+  inline bool hasPreservedFP() const noexcept { return hasAttribute(FuncAttributes::kHasPreservedFP); }
+  //! Enables preserved frame pointer.
+  inline void setPreservedFP() noexcept { addAttributes(FuncAttributes::kHasPreservedFP); }
+  //! Disables preserved frame pointer.
+  inline void resetPreservedFP() noexcept { clearAttributes(FuncAttributes::kHasPreservedFP); }
+
+  //! Tests whether the function calls other functions.
+  inline bool hasFuncCalls() const noexcept { return hasAttribute(FuncAttributes::kHasFuncCalls); }
+  //! Sets `kFlagHasCalls` to true.
+  inline void setFuncCalls() noexcept { addAttributes(FuncAttributes::kHasFuncCalls); }
+  //! Sets `kFlagHasCalls` to false.
+  inline void resetFuncCalls() noexcept { clearAttributes(FuncAttributes::kHasFuncCalls); }
+
+  //! Tests whether the function has AVX enabled.
+  inline bool isAvxEnabled() const noexcept { return hasAttribute(FuncAttributes::kX86_AVXEnabled); }
+  //! Enables AVX use.
+  inline void setAvxEnabled() noexcept { addAttributes(FuncAttributes::kX86_AVXEnabled); }
+  //! Disables AVX use.
+  inline void resetAvxEnabled() noexcept { clearAttributes(FuncAttributes::kX86_AVXEnabled); }
+
+  //! Tests whether the function has AVX-512 enabled.
+  inline bool isAvx512Enabled() const noexcept { return hasAttribute(FuncAttributes::kX86_AVX512Enabled); }
+  //! Enables AVX-512 use.
+  inline void setAvx512Enabled() noexcept { addAttributes(FuncAttributes::kX86_AVX512Enabled); }
+  //! Disables AVX-512 use.
+  inline void resetAvx512Enabled() noexcept { clearAttributes(FuncAttributes::kX86_AVX512Enabled); }
+
+  //! Tests whether the function has MMX cleanup - 'emms' instruction in epilog.
+  inline bool hasMmxCleanup() const noexcept { return hasAttribute(FuncAttributes::kX86_MMXCleanup); }
+  //! Enables MMX cleanup.
+  inline void setMmxCleanup() noexcept { addAttributes(FuncAttributes::kX86_MMXCleanup); }
+  //! Disables MMX cleanup.
+  inline void resetMmxCleanup() noexcept { clearAttributes(FuncAttributes::kX86_MMXCleanup); }
+
+  //! Tests whether the function has AVX cleanup - 'vzeroupper' instruction in epilog.
+  inline bool hasAvxCleanup() const noexcept { return hasAttribute(FuncAttributes::kX86_AVXCleanup); }
+  //! Enables AVX cleanup.
+  inline void setAvxCleanup() noexcept { addAttributes(FuncAttributes::kX86_AVXCleanup); }
+  //! Disables AVX cleanup.
+  inline void resetAvxCleanup() noexcept { clearAttributes(FuncAttributes::kX86_AVXCleanup); }
+
+  //! Tests whether the function uses call stack.
+  inline bool hasCallStack() const noexcept { return _callStackSize != 0; }
+  //! Tests whether the function uses local stack.
+  inline bool hasLocalStack() const noexcept { return _localStackSize != 0; }
+  //! Tests whether vector registers can be saved and restored by using aligned reads and writes.
+  inline bool hasAlignedVecSR() const noexcept { return hasAttribute(FuncAttributes::kAlignedVecSR); }
+  //! Tests whether the function has to align stack dynamically.
+  inline bool hasDynamicAlignment() const noexcept { return _finalStackAlignment >= _minDynamicAlignment; }
+
+  //! Tests whether the calling convention specifies 'RedZone'.
+  inline bool hasRedZone() const noexcept { return _redZoneSize != 0; }
+  //! Tests whether the calling convention specifies 'SpillZone'.
+  inline bool hasSpillZone() const noexcept { return _spillZoneSize != 0; }
+
+  //! Returns the size of 'RedZone'.
+  inline uint32_t redZoneSize() const noexcept { return _redZoneSize; }
+  //! Returns the size of 'SpillZone'.
+  inline uint32_t spillZoneSize() const noexcept { return _spillZoneSize; }
+  //! Returns natural stack alignment (guaranteed stack alignment upon entry).
+  inline uint32_t naturalStackAlignment() const noexcept { return _naturalStackAlignment; }
+  //! Returns natural stack alignment (guaranteed stack alignment upon entry).
+  inline uint32_t minDynamicAlignment() const noexcept { return _minDynamicAlignment; }
+
+  //! Tests whether the callee must adjust SP before returning (X86-STDCALL only)
+  inline bool hasCalleeStackCleanup() const noexcept { return _calleeStackCleanup != 0; }
+  //! Returns home many bytes of the stack the callee must adjust before returning (X86-STDCALL only)
+  inline uint32_t calleeStackCleanup() const noexcept { return _calleeStackCleanup; }
+
+  //! Returns call stack alignment.
+  inline uint32_t callStackAlignment() const noexcept { return _callStackAlignment; }
+  //! Returns local stack alignment.
+  inline uint32_t localStackAlignment() const noexcept { return _localStackAlignment; }
+  //! Returns final stack alignment (the maximum value of call, local, and natural stack alignments).
+  inline uint32_t finalStackAlignment() const noexcept { return _finalStackAlignment; }
+
+  //! Sets call stack alignment.
+  //!
+  //! \note This also updates the final stack alignment.
+  inline void setCallStackAlignment(uint32_t alignment) noexcept {
+    _callStackAlignment = uint8_t(alignment);
+    _finalStackAlignment = Support::max(_naturalStackAlignment, _callStackAlignment, _localStackAlignment);
+  }
+
+  //! Sets local stack alignment.
+  //!
+  //! \note This also updates the final stack alignment.
+  inline void setLocalStackAlignment(uint32_t value) noexcept {
+    _localStackAlignment = uint8_t(value);
+    _finalStackAlignment = Support::max(_naturalStackAlignment, _callStackAlignment, _localStackAlignment);
+  }
+
+  //! Combines call stack alignment with `alignment`, updating it to the greater value.
+  //!
+  //! \note This also updates the final stack alignment.
+  inline void updateCallStackAlignment(uint32_t alignment) noexcept {
+    _callStackAlignment = uint8_t(Support::max<uint32_t>(_callStackAlignment, alignment));
+    _finalStackAlignment = Support::max(_finalStackAlignment, _callStackAlignment);
+  }
+
+  //! Combines local stack alignment with `alignment`, updating it to the greater value.
+  //!
+  //! \note This also updates the final stack alignment.
+  inline void updateLocalStackAlignment(uint32_t alignment) noexcept {
+    _localStackAlignment = uint8_t(Support::max<uint32_t>(_localStackAlignment, alignment));
+    _finalStackAlignment = Support::max(_finalStackAlignment, _localStackAlignment);
+  }
+
+  //! Returns call stack size.
+  inline uint32_t callStackSize() const noexcept { return _callStackSize; }
+  //! Returns local stack size.
+  inline uint32_t localStackSize() const noexcept { return _localStackSize; }
+
+  //! Sets call stack size.
+  inline void setCallStackSize(uint32_t size) noexcept { _callStackSize = size; }
+  //! Sets local stack size.
+  inline void setLocalStackSize(uint32_t size) noexcept { _localStackSize = size; }
+
+  //! Combines call stack size with `size`, updating it to the greater value.
+  inline void updateCallStackSize(uint32_t size) noexcept { _callStackSize = Support::max(_callStackSize, size); }
+  //! Combines local stack size with `size`, updating it to the greater value.
+  inline void updateLocalStackSize(uint32_t size) noexcept { _localStackSize = Support::max(_localStackSize, size); }
+
+  //! Returns final stack size (only valid after the FuncFrame is finalized).
+  inline uint32_t finalStackSize() const noexcept { return _finalStackSize; }
+
+  //! Returns an offset to access the local stack (non-zero only if call stack is used).
+  inline uint32_t localStackOffset() const noexcept { return _localStackOffset; }
+
+  //! Tests whether the function prolog/epilog requires a memory slot for storing unaligned SP.
+  inline bool hasDAOffset() const noexcept { return _daOffset != kTagInvalidOffset; }
+  //! Returns a memory offset used to store DA (dynamic alignment) slot (relative to SP).
+  inline uint32_t daOffset() const noexcept { return _daOffset; }
+
+  inline uint32_t saOffset(uint32_t regId) const noexcept {
+    return regId == _spRegId ? saOffsetFromSP()
+                             : saOffsetFromSA();
+  }
+
+  inline uint32_t saOffsetFromSP() const noexcept { return _saOffsetFromSP; }
+  inline uint32_t saOffsetFromSA() const noexcept { return _saOffsetFromSA; }
+
+  //! Returns mask of registers of the given register `group` that are modified by the function. The engine would
+  //! then calculate which registers must be saved & restored by the function by using the data provided by the
+  //! calling convention.
+  inline RegMask dirtyRegs(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _dirtyRegs[group];
+  }
+
+  //! Sets which registers (as a mask) are modified by the function.
+  //!
+  //! \remarks Please note that this will completely overwrite the existing register mask, use `addDirtyRegs()`
+  //! to modify the existing register mask.
+  inline void setDirtyRegs(RegGroup group, RegMask regs) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    _dirtyRegs[group] = regs;
+  }
+
+  //! Adds which registers (as a mask) are modified by the function.
+  inline void addDirtyRegs(RegGroup group, RegMask regs) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    _dirtyRegs[group] |= regs;
+  }
+
+  //! \overload
+  inline void addDirtyRegs(const BaseReg& reg) noexcept {
+    ASMJIT_ASSERT(reg.id() < Globals::kMaxPhysRegs);
+    addDirtyRegs(reg.group(), Support::bitMask(reg.id()));
+  }
+
+  //! \overload
+  template<typename... Args>
+  inline void addDirtyRegs(const BaseReg& reg, Args&&... args) noexcept {
+    addDirtyRegs(reg);
+    addDirtyRegs(std::forward<Args>(args)...);
+  }
+
+  inline void setAllDirty() noexcept {
+    for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(_dirtyRegs); i++)
+      _dirtyRegs[i] = 0xFFFFFFFFu;
+  }
+
+  inline void setAllDirty(RegGroup group) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    _dirtyRegs[group] = 0xFFFFFFFFu;
+  }
+
+  //! Returns a calculated mask of registers of the given `group` that will be saved and restored in the function's
+  //! prolog and epilog, respectively. The register mask is calculated from both `dirtyRegs` (provided by user) and
+  //! `preservedMask` (provided by the calling convention).
+  inline RegMask savedRegs(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _dirtyRegs[group] & _preservedRegs[group];
+  }
+
+  //! Returns the mask of preserved registers of the given register `group`.
+  //!
+  //! Preserved registers are those that must survive the function call unmodified. The function can only modify
+  //! preserved registers it they are saved and restored in funciton's prolog and epilog, respectively.
+  inline RegMask preservedRegs(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _preservedRegs[group];
+  }
+
+  inline uint32_t saveRestoreRegSize(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _saveRestoreRegSize[group];
+  }
+
+  inline uint32_t saveRestoreAlignment(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _saveRestoreAlignment[group];
+  }
+
+  inline bool hasSARegId() const noexcept { return _saRegId != BaseReg::kIdBad; }
+  inline uint32_t saRegId() const noexcept { return _saRegId; }
+  inline void setSARegId(uint32_t regId) { _saRegId = uint8_t(regId); }
+  inline void resetSARegId() { setSARegId(BaseReg::kIdBad); }
+
+  //! Returns stack size required to save/restore registers via push/pop.
+  inline uint32_t pushPopSaveSize() const noexcept { return _pushPopSaveSize; }
+  //! Returns an offset to the stack where registers are saved via push/pop.
+  inline uint32_t pushPopSaveOffset() const noexcept { return _pushPopSaveOffset; }
+
+  //! Returns stack size required to save/restore extra registers that don't use push/pop/
+  //!
+  //! \note On X86 this covers all registers except GP registers, on other architectures it can be always
+  //! zero (for example AArch64 saves all registers via push/pop like instructions, so this would be zero).
+  inline uint32_t extraRegSaveSize() const noexcept { return _extraRegSaveSize; }
+  //! Returns an offset to the stack where extra registers are saved.
+  inline uint32_t extraRegSaveOffset() const noexcept { return _extraRegSaveOffset; }
+
+  //! Tests whether the functions contains stack adjustment.
+  inline bool hasStackAdjustment() const noexcept { return _stackAdjustment != 0; }
+  //! Returns function's stack adjustment used in function's prolog and epilog.
+  //!
+  //! If the returned value is zero it means that the stack is not adjusted. This can mean both that the stack
+  //! is not used and/or the stack is only adjusted by instructions that pust/pop registers into/from stack.
+  inline uint32_t stackAdjustment() const noexcept { return _stackAdjustment; }
+
+  //! \}
+
+  //! \name Finaliztion
+  //! \{
+
+  ASMJIT_API Error finalize() noexcept;
+
+  //! \}
+};
+
+//! A helper class that can be used to assign a physical register for each function argument. Use with
+//! `BaseEmitter::emitArgsAssignment()`.
+class FuncArgsAssignment {
+public:
+  //! \name Members
+  //! \{
+
+  //! Function detail.
+  const FuncDetail* _funcDetail;
+  //! Register that can be used to access arguments passed by stack.
+  uint8_t _saRegId;
+  //! Reserved for future use.
+  uint8_t _reserved[3];
+  //! Mapping of each function argument.
+  FuncValuePack _argPacks[Globals::kMaxFuncArgs];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline explicit FuncArgsAssignment(const FuncDetail* fd = nullptr) noexcept { reset(fd); }
+
+  inline FuncArgsAssignment(const FuncArgsAssignment& other) noexcept {
+    memcpy(this, &other, sizeof(*this));
+  }
+
+  inline void reset(const FuncDetail* fd = nullptr) noexcept {
+    _funcDetail = fd;
+    _saRegId = uint8_t(BaseReg::kIdBad);
+    memset(_reserved, 0, sizeof(_reserved));
+    memset(_argPacks, 0, sizeof(_argPacks));
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline const FuncDetail* funcDetail() const noexcept { return _funcDetail; }
+  inline void setFuncDetail(const FuncDetail* fd) noexcept { _funcDetail = fd; }
+
+  inline bool hasSARegId() const noexcept { return _saRegId != BaseReg::kIdBad; }
+  inline uint32_t saRegId() const noexcept { return _saRegId; }
+  inline void setSARegId(uint32_t regId) { _saRegId = uint8_t(regId); }
+  inline void resetSARegId() { _saRegId = uint8_t(BaseReg::kIdBad); }
+
+  inline FuncValue& arg(size_t argIndex, size_t valueIndex) noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    return _argPacks[argIndex][valueIndex];
+  }
+  inline const FuncValue& arg(size_t argIndex, size_t valueIndex) const noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    return _argPacks[argIndex][valueIndex];
+  }
+
+  inline bool isAssigned(size_t argIndex, size_t valueIndex) const noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    return _argPacks[argIndex][valueIndex].isAssigned();
+  }
+
+  inline void assignReg(size_t argIndex, const BaseReg& reg, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    ASMJIT_ASSERT(reg.isPhysReg());
+    _argPacks[argIndex][0].initReg(reg.type(), reg.id(), typeId);
+  }
+
+  inline void assignReg(size_t argIndex, RegType regType, uint32_t regId, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    _argPacks[argIndex][0].initReg(regType, regId, typeId);
+  }
+
+  inline void assignStack(size_t argIndex, int32_t offset, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    _argPacks[argIndex][0].initStack(offset, typeId);
+  }
+
+  inline void assignRegInPack(size_t argIndex, size_t valueIndex, const BaseReg& reg, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    ASMJIT_ASSERT(reg.isPhysReg());
+    _argPacks[argIndex][valueIndex].initReg(reg.type(), reg.id(), typeId);
+  }
+
+  inline void assignRegInPack(size_t argIndex, size_t valueIndex, RegType regType, uint32_t regId, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    _argPacks[argIndex][valueIndex].initReg(regType, regId, typeId);
+  }
+
+  inline void assignStackInPack(size_t argIndex, size_t valueIndex, int32_t offset, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    _argPacks[argIndex][valueIndex].initStack(offset, typeId);
+  }
+
+  // NOTE: All `assignAll()` methods are shortcuts to assign all arguments at once, however, since registers are
+  // passed all at once these initializers don't provide any way to pass TypeId and/or to keep any argument between
+  // the arguments passed unassigned.
+  inline void _assignAllInternal(size_t argIndex, const BaseReg& reg) noexcept {
+    assignReg(argIndex, reg);
+  }
+
+  template<typename... Args>
+  inline void _assignAllInternal(size_t argIndex, const BaseReg& reg, Args&&... args) noexcept {
+    assignReg(argIndex, reg);
+    _assignAllInternal(argIndex + 1, std::forward<Args>(args)...);
+  }
+
+  template<typename... Args>
+  inline void assignAll(Args&&... args) noexcept {
+    _assignAllInternal(0, std::forward<Args>(args)...);
+  }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Update `FuncFrame` based on function's arguments assignment.
+  //!
+  //! \note You MUST call this in orher to use `BaseEmitter::emitArgsAssignment()`, otherwise the FuncFrame would
+  //! not contain the information necessary to assign all arguments into the registers and/or stack specified.
+  ASMJIT_API Error updateFuncFrame(FuncFrame& frame) const noexcept;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_FUNC_H_INCLUDED
+
diff --git a/lib/lepton/asmjit/core/funcargscontext.cpp b/lib/lepton/asmjit/core/funcargscontext.cpp
new file mode 100644
index 0000000000..1db50a7082
--- /dev/null
+++ b/lib/lepton/asmjit/core/funcargscontext.cpp
@@ -0,0 +1,293 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/funcargscontext_p.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_core
+//! \{
+
+FuncArgsContext::FuncArgsContext() noexcept {
+  for (RegGroup group : RegGroupVirtValues{})
+    _workData[size_t(group)].reset();
+}
+
+ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, const FuncArgsAssignment& args, const RAConstraints* constraints) noexcept {
+  Arch arch = frame.arch();
+  const FuncDetail& func = *args.funcDetail();
+
+  _archTraits = &ArchTraits::byArch(arch);
+  _constraints = constraints;
+  _arch = arch;
+
+  // Initialize `_archRegs`.
+  for (RegGroup group : RegGroupVirtValues{})
+    _workData[group]._archRegs = _constraints->availableRegs(group);
+
+  if (frame.hasPreservedFP())
+    _workData[size_t(RegGroup::kGp)]._archRegs &= ~Support::bitMask(archTraits().fpRegId());
+
+  // Extract information from all function arguments/assignments and build Var[] array.
+  uint32_t varId = 0;
+  for (uint32_t argIndex = 0; argIndex < Globals::kMaxFuncArgs; argIndex++) {
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      const FuncValue& dst_ = args.arg(argIndex, valueIndex);
+      if (!dst_.isAssigned())
+        continue;
+
+      const FuncValue& src_ = func.arg(argIndex, valueIndex);
+      if (ASMJIT_UNLIKELY(!src_.isAssigned()))
+        return DebugUtils::errored(kErrorInvalidState);
+
+      Var& var = _vars[varId];
+      var.init(src_, dst_);
+
+      FuncValue& src = var.cur;
+      FuncValue& dst = var.out;
+
+      RegGroup dstGroup = RegGroup::kMaxValue;
+      uint32_t dstId = BaseReg::kIdBad;
+      WorkData* dstWd = nullptr;
+
+      // Not supported.
+      if (src.isIndirect())
+        return DebugUtils::errored(kErrorInvalidAssignment);
+
+      if (dst.isReg()) {
+        RegType dstType = dst.regType();
+        if (ASMJIT_UNLIKELY(!archTraits().hasRegType(dstType)))
+          return DebugUtils::errored(kErrorInvalidRegType);
+
+        // Copy TypeId from source if the destination doesn't have it. The RA used by BaseCompiler would never
+        // leave TypeId undefined, but users of FuncAPI can just assign phys regs without specifying the type.
+        if (!dst.hasTypeId())
+          dst.setTypeId(archTraits().regTypeToTypeId(dst.regType()));
+
+        dstGroup = archTraits().regTypeToGroup(dstType);
+        if (ASMJIT_UNLIKELY(dstGroup > RegGroup::kMaxVirt))
+          return DebugUtils::errored(kErrorInvalidRegGroup);
+
+        dstWd = &_workData[dstGroup];
+        dstId = dst.regId();
+        if (ASMJIT_UNLIKELY(dstId >= 32 || !Support::bitTest(dstWd->archRegs(), dstId)))
+          return DebugUtils::errored(kErrorInvalidPhysId);
+
+        if (ASMJIT_UNLIKELY(Support::bitTest(dstWd->dstRegs(), dstId)))
+          return DebugUtils::errored(kErrorOverlappedRegs);
+
+        dstWd->_dstRegs  |= Support::bitMask(dstId);
+        dstWd->_dstShuf  |= Support::bitMask(dstId);
+        dstWd->_usedRegs |= Support::bitMask(dstId);
+      }
+      else {
+        if (!dst.hasTypeId())
+          dst.setTypeId(src.typeId());
+
+        OperandSignature signature = getSuitableRegForMemToMemMove(arch, dst.typeId(), src.typeId());
+        if (ASMJIT_UNLIKELY(!signature.isValid()))
+          return DebugUtils::errored(kErrorInvalidState);
+        _stackDstMask = uint8_t(_stackDstMask | Support::bitMask(signature.regGroup()));
+      }
+
+      if (src.isReg()) {
+        uint32_t srcId = src.regId();
+        RegGroup srcGroup = archTraits().regTypeToGroup(src.regType());
+
+        if (dstGroup == srcGroup) {
+          ASMJIT_ASSERT(dstWd != nullptr);
+          dstWd->assign(varId, srcId);
+
+          // The best case, register is allocated where it is expected to be.
+          if (dstId == srcId)
+            var.markDone();
+        }
+        else {
+          if (ASMJIT_UNLIKELY(srcGroup > RegGroup::kMaxVirt))
+            return DebugUtils::errored(kErrorInvalidState);
+
+          WorkData& srcData = _workData[size_t(srcGroup)];
+          srcData.assign(varId, srcId);
+        }
+      }
+      else {
+        if (dstWd)
+          dstWd->_numStackArgs++;
+        _hasStackSrc = true;
+      }
+
+      varId++;
+    }
+  }
+
+  // Initialize WorkData::workRegs.
+  for (RegGroup group : RegGroupVirtValues{}) {
+    _workData[group]._workRegs =
+      (_workData[group].archRegs() & (frame.dirtyRegs(group) | ~frame.preservedRegs(group))) | _workData[group].dstRegs() | _workData[group].assignedRegs();
+  }
+
+  // Create a variable that represents `SARegId` if necessary.
+  bool saRegRequired = _hasStackSrc && frame.hasDynamicAlignment() && !frame.hasPreservedFP();
+
+  WorkData& gpRegs = _workData[RegGroup::kGp];
+  uint32_t saCurRegId = frame.saRegId();
+  uint32_t saOutRegId = args.saRegId();
+
+  if (saCurRegId != BaseReg::kIdBad) {
+    // Check if the provided `SARegId` doesn't collide with input registers.
+    if (ASMJIT_UNLIKELY(gpRegs.isAssigned(saCurRegId)))
+      return DebugUtils::errored(kErrorOverlappedRegs);
+  }
+
+  if (saOutRegId != BaseReg::kIdBad) {
+    // Check if the provided `SARegId` doesn't collide with argument assignments.
+    if (ASMJIT_UNLIKELY(Support::bitTest(gpRegs.dstRegs(), saOutRegId)))
+      return DebugUtils::errored(kErrorOverlappedRegs);
+    saRegRequired = true;
+  }
+
+  if (saRegRequired) {
+    TypeId ptrTypeId = Environment::is32Bit(arch) ? TypeId::kUInt32 : TypeId::kUInt64;
+    RegType ptrRegType = Environment::is32Bit(arch) ? RegType::kGp32 : RegType::kGp64;
+
+    _saVarId = uint8_t(varId);
+    _hasPreservedFP = frame.hasPreservedFP();
+
+    Var& var = _vars[varId];
+    var.reset();
+
+    if (saCurRegId == BaseReg::kIdBad) {
+      if (saOutRegId != BaseReg::kIdBad && !gpRegs.isAssigned(saOutRegId)) {
+        saCurRegId = saOutRegId;
+      }
+      else {
+        RegMask availableRegs = gpRegs.availableRegs();
+        if (!availableRegs)
+          availableRegs = gpRegs.archRegs() & ~gpRegs.workRegs();
+
+        if (ASMJIT_UNLIKELY(!availableRegs))
+          return DebugUtils::errored(kErrorNoMorePhysRegs);
+
+        saCurRegId = Support::ctz(availableRegs);
+      }
+    }
+
+    var.cur.initReg(ptrRegType, saCurRegId, ptrTypeId);
+    gpRegs.assign(varId, saCurRegId);
+    gpRegs._workRegs |= Support::bitMask(saCurRegId);
+
+    if (saOutRegId != BaseReg::kIdBad) {
+      var.out.initReg(ptrRegType, saOutRegId, ptrTypeId);
+      gpRegs._dstRegs  |= Support::bitMask(saOutRegId);
+      gpRegs._workRegs |= Support::bitMask(saOutRegId);
+    }
+    else {
+      var.markDone();
+    }
+
+    varId++;
+  }
+
+  _varCount = varId;
+
+  // Detect register swaps.
+  for (varId = 0; varId < _varCount; varId++) {
+    Var& var = _vars[varId];
+    if (var.cur.isReg() && var.out.isReg()) {
+      uint32_t srcId = var.cur.regId();
+      uint32_t dstId = var.out.regId();
+
+      RegGroup group = archTraits().regTypeToGroup(var.cur.regType());
+      if (group != archTraits().regTypeToGroup(var.out.regType()))
+        continue;
+
+      WorkData& wd = _workData[group];
+      if (wd.isAssigned(dstId)) {
+        Var& other = _vars[wd._physToVarId[dstId]];
+        if (archTraits().regTypeToGroup(other.out.regType()) == group && other.out.regId() == srcId) {
+          wd._numSwaps++;
+          _regSwapsMask = uint8_t(_regSwapsMask | Support::bitMask(group));
+        }
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error FuncArgsContext::markDstRegsDirty(FuncFrame& frame) noexcept {
+  for (RegGroup group : RegGroupVirtValues{}) {
+    WorkData& wd = _workData[group];
+    uint32_t regs = wd.usedRegs() | wd._dstShuf;
+
+    wd._workRegs |= regs;
+    frame.addDirtyRegs(group, regs);
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error FuncArgsContext::markScratchRegs(FuncFrame& frame) noexcept {
+  uint32_t groupMask = 0;
+
+  // Handle stack to stack moves.
+  groupMask |= _stackDstMask;
+
+  // Handle register swaps.
+  groupMask |= _regSwapsMask & ~Support::bitMask(RegGroup::kGp);
+
+  if (!groupMask)
+    return kErrorOk;
+
+  // Selects one dirty register per affected group that can be used as a scratch register.
+  for (RegGroup group : RegGroupVirtValues{}) {
+    if (Support::bitTest(groupMask, group)) {
+      WorkData& wd = _workData[group];
+
+      // Initially, pick some clobbered or dirty register.
+      RegMask workRegs = wd.workRegs();
+      RegMask regs = workRegs & ~(wd.usedRegs() | wd._dstShuf);
+
+      // If that didn't work out pick some register which is not in 'used'.
+      if (!regs)
+        regs = workRegs & ~wd.usedRegs();
+
+      // If that didn't work out pick any other register that is allocable.
+      // This last resort case will, however, result in marking one more
+      // register dirty.
+      if (!regs)
+        regs = wd.archRegs() & ~workRegs;
+
+      // If that didn't work out we will have to use XORs instead of MOVs.
+      if (!regs)
+        continue;
+
+      RegMask regMask = Support::blsi(regs);
+      wd._workRegs |= regMask;
+      frame.addDirtyRegs(group, regMask);
+    }
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error FuncArgsContext::markStackArgsReg(FuncFrame& frame) noexcept {
+  if (_saVarId != kVarIdNone) {
+    const Var& var = _vars[_saVarId];
+    frame.setSARegId(var.cur.regId());
+  }
+  else if (frame.hasPreservedFP()) {
+    frame.setSARegId(archTraits().fpRegId());
+  }
+
+  return kErrorOk;
+}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/funcargscontext_p.h b/lib/lepton/asmjit/core/funcargscontext_p.h
new file mode 100644
index 0000000000..72ee10585a
--- /dev/null
+++ b/lib/lepton/asmjit/core/funcargscontext_p.h
@@ -0,0 +1,199 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_FUNCARGSCONTEXT_P_H_INCLUDED
+#define ASMJIT_CORE_FUNCARGSCONTEXT_P_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/environment.h"
+#include "../core/func.h"
+#include "../core/operand.h"
+#include "../core/radefs_p.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_core
+//! \{
+
+static inline OperandSignature getSuitableRegForMemToMemMove(Arch arch, TypeId dstTypeId, TypeId srcTypeId) noexcept {
+  const ArchTraits& archTraits = ArchTraits::byArch(arch);
+
+  uint32_t dstSize = TypeUtils::sizeOf(dstTypeId);
+  uint32_t srcSize = TypeUtils::sizeOf(srcTypeId);
+  uint32_t maxSize = Support::max<uint32_t>(dstSize, srcSize);
+  uint32_t regSize = Environment::registerSizeFromArch(arch);
+
+  OperandSignature signature{0};
+  if (maxSize <= regSize || (TypeUtils::isInt(dstTypeId) && TypeUtils::isInt(srcTypeId)))
+    signature = maxSize <= 4 ? archTraits.regTypeToSignature(RegType::kGp32)
+                             : archTraits.regTypeToSignature(RegType::kGp64);
+  else if (maxSize <= 8 && archTraits.hasRegType(RegType::kVec64))
+    signature = archTraits.regTypeToSignature(RegType::kVec64);
+  else if (maxSize <= 16 && archTraits.hasRegType(RegType::kVec128))
+    signature = archTraits.regTypeToSignature(RegType::kVec128);
+  else if (maxSize <= 32 && archTraits.hasRegType(RegType::kVec256))
+    signature = archTraits.regTypeToSignature(RegType::kVec256);
+  else if (maxSize <= 64 && archTraits.hasRegType(RegType::kVec512))
+    signature = archTraits.regTypeToSignature(RegType::kVec512);
+
+  return signature;
+}
+
+class FuncArgsContext {
+public:
+  enum VarId : uint32_t {
+    kVarIdNone = 0xFF
+  };
+
+  //! Contains information about a single argument or SA register that may need shuffling.
+  struct Var {
+    FuncValue cur;
+    FuncValue out;
+
+    inline void init(const FuncValue& cur_, const FuncValue& out_) noexcept {
+      cur = cur_;
+      out = out_;
+    }
+
+    //! Reset the value to its unassigned state.
+    inline void reset() noexcept {
+      cur.reset();
+      out.reset();
+    }
+
+    inline bool isDone() const noexcept { return cur.isDone(); }
+    inline void markDone() noexcept { cur.addFlags(FuncValue::kFlagIsDone); }
+  };
+
+  struct WorkData {
+    //! All allocable registers provided by the architecture.
+    RegMask _archRegs;
+    //! All registers that can be used by the shuffler.
+    RegMask _workRegs;
+    //! Registers used by the shuffler (all).
+    RegMask _usedRegs;
+    //! Assigned registers.
+    RegMask _assignedRegs;
+    //! Destination registers assigned to arguments or SA.
+    RegMask _dstRegs;
+    //! Destination registers that require shuffling.
+    RegMask _dstShuf;
+    //! Number of register swaps.
+    uint8_t _numSwaps;
+    //! Number of stack loads.
+    uint8_t _numStackArgs;
+    //! Reserved (only used as padding).
+    uint8_t _reserved[6];
+    //! Physical ID to variable ID mapping.
+    uint8_t _physToVarId[32];
+
+    inline void reset() noexcept {
+      _archRegs = 0;
+      _workRegs = 0;
+      _usedRegs = 0;
+      _assignedRegs = 0;
+      _dstRegs = 0;
+      _dstShuf = 0;
+      _numSwaps = 0;
+      _numStackArgs = 0;
+      memset(_reserved, 0, sizeof(_reserved));
+      memset(_physToVarId, kVarIdNone, 32);
+    }
+
+    inline bool isAssigned(uint32_t regId) const noexcept {
+      ASMJIT_ASSERT(regId < 32);
+      return Support::bitTest(_assignedRegs, regId);
+    }
+
+    inline void assign(uint32_t varId, uint32_t regId) noexcept {
+      ASMJIT_ASSERT(!isAssigned(regId));
+      ASMJIT_ASSERT(_physToVarId[regId] == kVarIdNone);
+
+      _physToVarId[regId] = uint8_t(varId);
+      _assignedRegs ^= Support::bitMask(regId);
+    }
+
+    inline void reassign(uint32_t varId, uint32_t newId, uint32_t oldId) noexcept {
+      ASMJIT_ASSERT( isAssigned(oldId));
+      ASMJIT_ASSERT(!isAssigned(newId));
+      ASMJIT_ASSERT(_physToVarId[oldId] == varId);
+      ASMJIT_ASSERT(_physToVarId[newId] == kVarIdNone);
+
+      _physToVarId[oldId] = uint8_t(kVarIdNone);
+      _physToVarId[newId] = uint8_t(varId);
+      _assignedRegs ^= Support::bitMask(newId) ^ Support::bitMask(oldId);
+    }
+
+    inline void swap(uint32_t aVarId, uint32_t aRegId, uint32_t bVarId, uint32_t bRegId) noexcept {
+      ASMJIT_ASSERT(isAssigned(aRegId));
+      ASMJIT_ASSERT(isAssigned(bRegId));
+      ASMJIT_ASSERT(_physToVarId[aRegId] == aVarId);
+      ASMJIT_ASSERT(_physToVarId[bRegId] == bVarId);
+
+      _physToVarId[aRegId] = uint8_t(bVarId);
+      _physToVarId[bRegId] = uint8_t(aVarId);
+    }
+
+    inline void unassign(uint32_t varId, uint32_t regId) noexcept {
+      ASMJIT_ASSERT(isAssigned(regId));
+      ASMJIT_ASSERT(_physToVarId[regId] == varId);
+
+      DebugUtils::unused(varId);
+      _physToVarId[regId] = uint8_t(kVarIdNone);
+      _assignedRegs ^= Support::bitMask(regId);
+    }
+
+    inline RegMask archRegs() const noexcept { return _archRegs; }
+    inline RegMask workRegs() const noexcept { return _workRegs; }
+    inline RegMask usedRegs() const noexcept { return _usedRegs; }
+    inline RegMask assignedRegs() const noexcept { return _assignedRegs; }
+    inline RegMask dstRegs() const noexcept { return _dstRegs; }
+    inline RegMask availableRegs() const noexcept { return _workRegs & ~_assignedRegs; }
+  };
+
+  //! Architecture traits.
+  const ArchTraits* _archTraits = nullptr;
+  //! Architecture constraints.
+  const RAConstraints* _constraints = nullptr;
+  //! Target architecture.
+  Arch _arch = Arch::kUnknown;
+  //! Has arguments passed via stack (SRC).
+  bool _hasStackSrc = false;
+  //! Has preserved frame-pointer (FP).
+  bool _hasPreservedFP = false;
+  //! Has arguments assigned to stack (DST).
+  uint8_t _stackDstMask = 0;
+  //! Register swap groups (bit-mask).
+  uint8_t _regSwapsMask = 0;
+  uint8_t _saVarId = kVarIdNone;
+  uint32_t _varCount = 0;
+  Support::Array<WorkData, Globals::kNumVirtGroups> _workData;
+  Var _vars[Globals::kMaxFuncArgs * Globals::kMaxValuePack + 1];
+
+  FuncArgsContext() noexcept;
+
+  inline const ArchTraits& archTraits() const noexcept { return *_archTraits; }
+  inline Arch arch() const noexcept { return _arch; }
+
+  inline uint32_t varCount() const noexcept { return _varCount; }
+  inline size_t indexOf(const Var* var) const noexcept { return (size_t)(var - _vars); }
+
+  inline Var& var(size_t varId) noexcept { return _vars[varId]; }
+  inline const Var& var(size_t varId) const noexcept { return _vars[varId]; }
+
+  Error initWorkData(const FuncFrame& frame, const FuncArgsAssignment& args, const RAConstraints* constraints) noexcept;
+  Error markScratchRegs(FuncFrame& frame) noexcept;
+  Error markDstRegsDirty(FuncFrame& frame) noexcept;
+  Error markStackArgsReg(FuncFrame& frame) noexcept;
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_FUNCARGSCONTEXT_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/globals.cpp b/lib/lepton/asmjit/core/globals.cpp
new file mode 100644
index 0000000000..2bbd0c0577
--- /dev/null
+++ b/lib/lepton/asmjit/core/globals.cpp
@@ -0,0 +1,133 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/globals.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// DebugUtils - Error As String
+// ============================
+
+ASMJIT_FAVOR_SIZE const char* DebugUtils::errorAsString(Error err) noexcept {
+#ifndef ASMJIT_NO_TEXT
+  // @EnumStringBegin{"enum": "ErrorCode", "output": "sError", "strip": "kError"}@
+  static const char sErrorString[] =
+    "Ok\0"
+    "OutOfMemory\0"
+    "InvalidArgument\0"
+    "InvalidState\0"
+    "InvalidArch\0"
+    "NotInitialized\0"
+    "AlreadyInitialized\0"
+    "FeatureNotEnabled\0"
+    "TooManyHandles\0"
+    "TooLarge\0"
+    "NoCodeGenerated\0"
+    "InvalidDirective\0"
+    "InvalidLabel\0"
+    "TooManyLabels\0"
+    "LabelAlreadyBound\0"
+    "LabelAlreadyDefined\0"
+    "LabelNameTooLong\0"
+    "InvalidLabelName\0"
+    "InvalidParentLabel\0"
+    "InvalidSection\0"
+    "TooManySections\0"
+    "InvalidSectionName\0"
+    "TooManyRelocations\0"
+    "InvalidRelocEntry\0"
+    "RelocOffsetOutOfRange\0"
+    "InvalidAssignment\0"
+    "InvalidInstruction\0"
+    "InvalidRegType\0"
+    "InvalidRegGroup\0"
+    "InvalidPhysId\0"
+    "InvalidVirtId\0"
+    "InvalidElementIndex\0"
+    "InvalidPrefixCombination\0"
+    "InvalidLockPrefix\0"
+    "InvalidXAcquirePrefix\0"
+    "InvalidXReleasePrefix\0"
+    "InvalidRepPrefix\0"
+    "InvalidRexPrefix\0"
+    "InvalidExtraReg\0"
+    "InvalidKMaskUse\0"
+    "InvalidKZeroUse\0"
+    "InvalidBroadcast\0"
+    "InvalidEROrSAE\0"
+    "InvalidAddress\0"
+    "InvalidAddressIndex\0"
+    "InvalidAddressScale\0"
+    "InvalidAddress64Bit\0"
+    "InvalidAddress64BitZeroExtension\0"
+    "InvalidDisplacement\0"
+    "InvalidSegment\0"
+    "InvalidImmediate\0"
+    "InvalidOperandSize\0"
+    "AmbiguousOperandSize\0"
+    "OperandSizeMismatch\0"
+    "InvalidOption\0"
+    "OptionAlreadyDefined\0"
+    "InvalidTypeId\0"
+    "InvalidUseOfGpbHi\0"
+    "InvalidUseOfGpq\0"
+    "InvalidUseOfF80\0"
+    "NotConsecutiveRegs\0"
+    "ConsecutiveRegsAllocation\0"
+    "IllegalVirtReg\0"
+    "TooManyVirtRegs\0"
+    "NoMorePhysRegs\0"
+    "OverlappedRegs\0"
+    "OverlappingStackRegWithRegArg\0"
+    "ExpressionLabelNotBound\0"
+    "ExpressionOverflow\0"
+    "FailedToOpenAnonymousMemory\0"
+    "<Unknown>\0";
+
+  static const uint16_t sErrorIndex[] = {
+    0, 3, 15, 31, 44, 56, 71, 90, 108, 123, 132, 148, 165, 178, 192, 210, 230,
+    247, 264, 283, 298, 314, 333, 352, 370, 392, 410, 429, 444, 460, 474, 488,
+    508, 533, 551, 573, 595, 612, 629, 645, 661, 677, 694, 709, 724, 744, 764,
+    784, 817, 837, 852, 869, 888, 909, 929, 943, 964, 978, 996, 1012, 1028, 1047,
+    1073, 1088, 1104, 1119, 1134, 1164, 1188, 1207, 1235
+  };
+  // @EnumStringEnd@
+
+  return sErrorString + sErrorIndex[Support::min<Error>(err, kErrorCount)];
+#else
+  DebugUtils::unused(err);
+  static const char noMessage[] = "";
+  return noMessage;
+#endif
+}
+
+// DebugUtils - Debug Output
+// =========================
+
+ASMJIT_FAVOR_SIZE void DebugUtils::debugOutput(const char* str) noexcept {
+#if defined(_WIN32)
+  ::OutputDebugStringA(str);
+#else
+  ::fputs(str, stderr);
+#endif
+}
+
+// DebugUtils - Fatal Errors
+// =========================
+
+ASMJIT_FAVOR_SIZE void DebugUtils::assertionFailed(const char* file, int line, const char* msg) noexcept {
+  char str[1024];
+
+  snprintf(str, 1024,
+    "[asmjit] Assertion failed at %s (line %d):\n"
+    "[asmjit] %s\n", file, line, msg);
+
+  debugOutput(str);
+  ::abort();
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/globals.h b/lib/lepton/asmjit/core/globals.h
new file mode 100644
index 0000000000..f2d3c6e63d
--- /dev/null
+++ b/lib/lepton/asmjit/core/globals.h
@@ -0,0 +1,393 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_GLOBALS_H_INCLUDED
+#define ASMJIT_CORE_GLOBALS_H_INCLUDED
+
+#include "../core/api-config.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_utilities
+//! \{
+namespace Support {
+  //! Cast designed to cast between function and void* pointers.
+  template<typename Dst, typename Src>
+  static inline Dst ptr_cast_impl(Src p) noexcept { return (Dst)p; }
+} // {Support}
+
+#if defined(ASMJIT_NO_STDCXX)
+namespace Support {
+  ASMJIT_FORCE_INLINE void* operatorNew(size_t n) noexcept { return malloc(n); }
+  ASMJIT_FORCE_INLINE void operatorDelete(void* p) noexcept { if (p) free(p); }
+} // {Support}
+
+#define ASMJIT_BASE_CLASS(TYPE)                                                  \
+  ASMJIT_FORCE_INLINE void* operator new(size_t n) noexcept {                    \
+    return Support::operatorNew(n);                                              \
+  }                                                                              \
+                                                                                 \
+  ASMJIT_FORCE_INLINE void  operator delete(void* p) noexcept {                  \
+    Support::operatorDelete(p);                                                  \
+  }                                                                              \
+                                                                                 \
+  ASMJIT_FORCE_INLINE void* operator new(size_t, void* p) noexcept { return p; } \
+  ASMJIT_FORCE_INLINE void  operator delete(void*, void*) noexcept {}
+#else
+#define ASMJIT_BASE_CLASS(TYPE)
+#endif
+
+//! \}
+//! \endcond
+
+//! \addtogroup asmjit_core
+//! \{
+
+//! Byte order.
+enum class ByteOrder {
+  //! Little endian.
+  kLE = 0,
+  //! Big endian.
+  kBE = 1,
+  //! Native byte order of the target architecture.
+  kNative = ASMJIT_ARCH_LE ? kLE : kBE,
+  //! Swapped byte order of the target architecture.
+  kSwapped = ASMJIT_ARCH_LE ? kBE : kLE
+};
+
+//! A policy that can be used with some `reset()` member functions.
+enum class ResetPolicy : uint32_t {
+  //! Soft reset, doesn't deallocate memory (default).
+  kSoft = 0,
+  //! Hard reset, releases all memory used, if any.
+  kHard = 1
+};
+
+//! Contains typedefs, constants, and variables used globally by AsmJit.
+namespace Globals {
+
+//! Host memory allocator overhead.
+static constexpr uint32_t kAllocOverhead = uint32_t(sizeof(intptr_t) * 4);
+
+//! Host memory allocator alignment.
+static constexpr uint32_t kAllocAlignment = 8;
+
+//! Aggressive growing strategy threshold.
+static constexpr uint32_t kGrowThreshold = 1024 * 1024 * 16;
+
+//! Maximum depth of RB-Tree is:
+//!
+//!   `2 * log2(n + 1)`
+//!
+//! Size of RB node is at least two pointers (without data), so a theoretical architecture limit would be:
+//!
+//!   `2 * log2(addressableMemorySize / sizeof(Node) + 1)`
+//!
+//! Which yields 30 on 32-bit arch and 61 on 64-bit arch. The final value was adjusted by +1 for safety reasons.
+static constexpr uint32_t kMaxTreeHeight = (ASMJIT_ARCH_BITS == 32 ? 30 : 61) + 1;
+
+//! Maximum number of operands per a single instruction.
+static constexpr uint32_t kMaxOpCount = 6;
+
+//! Maximum arguments of a function supported by the Compiler / Function API.
+static constexpr uint32_t kMaxFuncArgs = 16;
+
+//! The number of values that can be assigned to a single function argument or
+//! return value.
+static constexpr uint32_t kMaxValuePack = 4;
+
+//! Maximum number of physical registers AsmJit can use per register group.
+static constexpr uint32_t kMaxPhysRegs = 32;
+
+//! Maximum alignment.
+static constexpr uint32_t kMaxAlignment = 64;
+
+//! Maximum label or symbol size in bytes.
+static constexpr uint32_t kMaxLabelNameSize = 2048;
+
+//! Maximum section name size.
+static constexpr uint32_t kMaxSectionNameSize = 35;
+
+//! Maximum size of comment.
+static constexpr uint32_t kMaxCommentSize = 1024;
+
+//! Invalid identifier.
+static constexpr uint32_t kInvalidId = 0xFFFFFFFFu;
+
+//! Returned by `indexOf()` and similar when working with containers that use 32-bit index/size.
+static constexpr uint32_t kNotFound = 0xFFFFFFFFu;
+
+//! Invalid base address.
+static constexpr uint64_t kNoBaseAddress = ~uint64_t(0);
+
+//! Number of virtual register groups.
+static constexpr uint32_t kNumVirtGroups = 4;
+
+struct Init_ {};
+struct NoInit_ {};
+
+static const constexpr Init_ Init {};
+static const constexpr NoInit_ NoInit {};
+
+} // {Globals}
+
+template<typename Func>
+static inline Func ptr_as_func(void* func) noexcept { return Support::ptr_cast_impl<Func, void*>(func); }
+
+template<typename Func>
+static inline void* func_as_ptr(Func func) noexcept { return Support::ptr_cast_impl<void*, Func>(func); }
+
+//! \}
+
+//! \addtogroup asmjit_error_handling
+//! \{
+
+//! AsmJit error type (uint32_t).
+typedef uint32_t Error;
+
+//! AsmJit error codes.
+enum ErrorCode : uint32_t {
+  // @EnumValuesBegin{"enum": "ErrorCode"}@
+
+  //! No error (success).
+  kErrorOk = 0,
+
+  //! Out of memory.
+  kErrorOutOfMemory,
+
+  //! Invalid argument.
+  kErrorInvalidArgument,
+
+  //! Invalid state.
+  //!
+  //! If this error is returned it means that either you are doing something wrong or AsmJit caught itself by
+  //! doing something wrong. This error should never be ignored.
+  kErrorInvalidState,
+
+  //! Invalid or incompatible architecture.
+  kErrorInvalidArch,
+
+  //! The object is not initialized.
+  kErrorNotInitialized,
+  //! The object is already initialized.
+  kErrorAlreadyInitialized,
+
+  //! Built-in feature was disabled at compile time and it's not available.
+  kErrorFeatureNotEnabled,
+
+  //! Too many handles (Windows) or file descriptors (Unix/Posix).
+  kErrorTooManyHandles,
+  //! Code generated is larger than allowed.
+  kErrorTooLarge,
+
+  //! No code generated.
+  //!
+  //! Returned by runtime if the \ref CodeHolder contains no code.
+  kErrorNoCodeGenerated,
+
+  //! Invalid directive.
+  kErrorInvalidDirective,
+  //! Attempt to use uninitialized label.
+  kErrorInvalidLabel,
+  //! Label index overflow - a single \ref BaseAssembler instance can hold almost 2^32 (4 billion) labels. If
+  //! there is an attempt to create more labels then this error is returned.
+  kErrorTooManyLabels,
+  //! Label is already bound.
+  kErrorLabelAlreadyBound,
+  //! Label is already defined (named labels).
+  kErrorLabelAlreadyDefined,
+  //! Label name is too long.
+  kErrorLabelNameTooLong,
+  //! Label must always be local if it's anonymous (without a name).
+  kErrorInvalidLabelName,
+  //! Parent id passed to \ref CodeHolder::newNamedLabelEntry() was either invalid or parent is not supported
+  //! by the requested `LabelType`.
+  kErrorInvalidParentLabel,
+
+  //! Invalid section.
+  kErrorInvalidSection,
+  //! Too many sections (section index overflow).
+  kErrorTooManySections,
+  //! Invalid section name (most probably too long).
+  kErrorInvalidSectionName,
+
+  //! Relocation index overflow (too many relocations).
+  kErrorTooManyRelocations,
+  //! Invalid relocation entry.
+  kErrorInvalidRelocEntry,
+  //! Reloc entry contains address that is out of range (unencodable).
+  kErrorRelocOffsetOutOfRange,
+
+  //! Invalid assignment to a register, function argument, or function return value.
+  kErrorInvalidAssignment,
+  //! Invalid instruction.
+  kErrorInvalidInstruction,
+  //! Invalid register type.
+  kErrorInvalidRegType,
+  //! Invalid register group.
+  kErrorInvalidRegGroup,
+  //! Invalid physical register id.
+  kErrorInvalidPhysId,
+  //! Invalid virtual register id.
+  kErrorInvalidVirtId,
+  //! Invalid element index (ARM).
+  kErrorInvalidElementIndex,
+  //! Invalid prefix combination (X86|X64).
+  kErrorInvalidPrefixCombination,
+  //! Invalid LOCK prefix (X86|X64).
+  kErrorInvalidLockPrefix,
+  //! Invalid XACQUIRE prefix (X86|X64).
+  kErrorInvalidXAcquirePrefix,
+  //! Invalid XRELEASE prefix (X86|X64).
+  kErrorInvalidXReleasePrefix,
+  //! Invalid REP prefix (X86|X64).
+  kErrorInvalidRepPrefix,
+  //! Invalid REX prefix (X86|X64).
+  kErrorInvalidRexPrefix,
+  //! Invalid {...} register (X86|X64).
+  kErrorInvalidExtraReg,
+  //! Invalid {k} use (not supported by the instruction) (X86|X64).
+  kErrorInvalidKMaskUse,
+  //! Invalid {k}{z} use (not supported by the instruction) (X86|X64).
+  kErrorInvalidKZeroUse,
+  //! Invalid broadcast - Currently only related to invalid use of AVX-512 {1tox} (X86|X64).
+  kErrorInvalidBroadcast,
+  //! Invalid 'embedded-rounding' {er} or 'suppress-all-exceptions' {sae} (AVX-512) (X86|X64).
+  kErrorInvalidEROrSAE,
+  //! Invalid address used (not encodable).
+  kErrorInvalidAddress,
+  //! Invalid index register used in memory address (not encodable).
+  kErrorInvalidAddressIndex,
+  //! Invalid address scale (not encodable).
+  kErrorInvalidAddressScale,
+  //! Invalid use of 64-bit address.
+  kErrorInvalidAddress64Bit,
+  //! Invalid use of 64-bit address that require 32-bit zero-extension (X64).
+  kErrorInvalidAddress64BitZeroExtension,
+  //! Invalid displacement (not encodable).
+  kErrorInvalidDisplacement,
+  //! Invalid segment (X86).
+  kErrorInvalidSegment,
+
+  //! Invalid immediate (out of bounds on X86 and invalid pattern on ARM).
+  kErrorInvalidImmediate,
+
+  //! Invalid operand size.
+  kErrorInvalidOperandSize,
+  //! Ambiguous operand size (memory has zero size while it's required to determine the operation type.
+  kErrorAmbiguousOperandSize,
+  //! Mismatching operand size (size of multiple operands doesn't match the operation size).
+  kErrorOperandSizeMismatch,
+
+  //! Invalid option.
+  kErrorInvalidOption,
+  //! Option already defined.
+  kErrorOptionAlreadyDefined,
+
+  //! Invalid TypeId.
+  kErrorInvalidTypeId,
+  //! Invalid use of a 8-bit GPB-HIGH register.
+  kErrorInvalidUseOfGpbHi,
+  //! Invalid use of a 64-bit GPQ register in 32-bit mode.
+  kErrorInvalidUseOfGpq,
+  //! Invalid use of an 80-bit float (\ref TypeId::kFloat80).
+  kErrorInvalidUseOfF80,
+  //! Instruction requires the use of consecutive registers, but registers in operands weren't (AVX512, ASIMD load/store, etc...).
+  kErrorNotConsecutiveRegs,
+  //! Failed to allocate consecutive registers - allocable registers either too restricted or a bug in RW info.
+  kErrorConsecutiveRegsAllocation,
+
+  //! Illegal virtual register - reported by instruction validation.
+  kErrorIllegalVirtReg,
+  //! AsmJit cannot create more virtual registers.
+  kErrorTooManyVirtRegs,
+
+  //! AsmJit requires a physical register, but no one is available.
+  kErrorNoMorePhysRegs,
+  //! A variable has been assigned more than once to a function argument (BaseCompiler).
+  kErrorOverlappedRegs,
+  //! Invalid register to hold stack arguments offset.
+  kErrorOverlappingStackRegWithRegArg,
+
+  //! Unbound label cannot be evaluated by expression.
+  kErrorExpressionLabelNotBound,
+  //! Arithmetic overflow during expression evaluation.
+  kErrorExpressionOverflow,
+
+  //! Failed to open anonymous memory handle or file descriptor.
+  kErrorFailedToOpenAnonymousMemory,
+
+  // @EnumValuesEnd@
+
+  //! Count of AsmJit error codes.
+  kErrorCount
+};
+
+//! Debugging utilities.
+namespace DebugUtils {
+
+//! \cond INTERNAL
+//! Used to silence warnings about unused arguments or variables.
+template<typename... Args>
+static inline void unused(Args&&...) noexcept {}
+//! \endcond
+
+//! Returns the error `err` passed.
+//!
+//! Provided for debugging purposes. Putting a breakpoint inside `errored` can help with tracing the origin of any
+//! error reported / returned by AsmJit.
+static constexpr Error errored(Error err) noexcept { return err; }
+
+//! Returns a printable version of `asmjit::Error` code.
+ASMJIT_API const char* errorAsString(Error err) noexcept;
+
+//! Called to output debugging message(s).
+ASMJIT_API void debugOutput(const char* str) noexcept;
+
+//! Called on assertion failure.
+//!
+//! \param file Source file name where it happened.
+//! \param line Line in the source file.
+//! \param msg Message to display.
+//!
+//! If you have problems with assertion failures a breakpoint can be put at \ref assertionFailed() function
+//! (asmjit/core/globals.cpp). A call stack will be available when such assertion failure is triggered. AsmJit
+//! always returns errors on failures, assertions are a last resort and usually mean unrecoverable state due to out
+//! of range array access or totally invalid arguments like nullptr where a valid pointer should be provided, etc...
+ASMJIT_API void ASMJIT_NORETURN assertionFailed(const char* file, int line, const char* msg) noexcept;
+
+} // {DebugUtils}
+
+//! \def ASMJIT_ASSERT(...)
+//!
+//! AsmJit's own assert macro used in AsmJit code-base.
+#if defined(ASMJIT_BUILD_DEBUG)
+#define ASMJIT_ASSERT(...)                                                     \
+  do {                                                                         \
+    if (ASMJIT_LIKELY(__VA_ARGS__))                                            \
+      break;                                                                   \
+    ::asmjit::DebugUtils::assertionFailed(__FILE__, __LINE__, #__VA_ARGS__);   \
+  } while (0)
+#else
+#define ASMJIT_ASSERT(...) ((void)0)
+#endif
+
+//! \def ASMJIT_PROPAGATE(...)
+//!
+//! Propagates a possible `Error` produced by `...` to the caller by returning the error immediately. Used by AsmJit
+//! internally, but kept public for users that want to use the same technique to propagate errors to the caller.
+#define ASMJIT_PROPAGATE(...)               \
+  do {                                      \
+    ::asmjit::Error _err = __VA_ARGS__;     \
+    if (ASMJIT_UNLIKELY(_err))              \
+      return _err;                          \
+  } while (0)
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_GLOBALS_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/inst.cpp b/lib/lepton/asmjit/core/inst.cpp
new file mode 100644
index 0000000000..8f29d8b758
--- /dev/null
+++ b/lib/lepton/asmjit/core/inst.cpp
@@ -0,0 +1,113 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/archtraits.h"
+#include "../core/inst.h"
+
+#if !defined(ASMJIT_NO_X86)
+  #include "../x86/x86instapi_p.h"
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  #include "../arm/a64instapi_p.h"
+#endif
+
+ASMJIT_BEGIN_NAMESPACE
+
+// InstAPI - InstId <-> String
+// ===========================
+
+#ifndef ASMJIT_NO_TEXT
+Error InstAPI::instIdToString(Arch arch, InstId instId, String& output) noexcept {
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::InstInternal::instIdToString(arch, instId, output);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyAArch64(arch))
+    return a64::InstInternal::instIdToString(arch, instId, output);
+#endif
+
+  return DebugUtils::errored(kErrorInvalidArch);
+}
+
+InstId InstAPI::stringToInstId(Arch arch, const char* s, size_t len) noexcept {
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::InstInternal::stringToInstId(arch, s, len);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyAArch64(arch))
+    return a64::InstInternal::stringToInstId(arch, s, len);
+#endif
+
+  return 0;
+}
+#endif // !ASMJIT_NO_TEXT
+
+// InstAPI - Validate
+// ==================
+
+#ifndef ASMJIT_NO_VALIDATION
+Error InstAPI::validate(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags) noexcept {
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::InstInternal::validate(arch, inst, operands, opCount, validationFlags);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyAArch64(arch))
+    return a64::InstInternal::validate(arch, inst, operands, opCount, validationFlags);
+#endif
+
+  return DebugUtils::errored(kErrorInvalidArch);
+}
+#endif // !ASMJIT_NO_VALIDATION
+
+// InstAPI - QueryRWInfo
+// =====================
+
+#ifndef ASMJIT_NO_INTROSPECTION
+Error InstAPI::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept {
+  if (ASMJIT_UNLIKELY(opCount > Globals::kMaxOpCount))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::InstInternal::queryRWInfo(arch, inst, operands, opCount, out);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyAArch64(arch))
+    return a64::InstInternal::queryRWInfo(arch, inst, operands, opCount, out);
+#endif
+
+  return DebugUtils::errored(kErrorInvalidArch);
+}
+#endif // !ASMJIT_NO_INTROSPECTION
+
+// InstAPI - QueryFeatures
+// =======================
+
+#ifndef ASMJIT_NO_INTROSPECTION
+Error InstAPI::queryFeatures(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, CpuFeatures* out) noexcept {
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::InstInternal::queryFeatures(arch, inst, operands, opCount, out);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyAArch64(arch))
+    return a64::InstInternal::queryFeatures(arch, inst, operands, opCount, out);
+#endif
+
+  return DebugUtils::errored(kErrorInvalidArch);
+}
+#endif // !ASMJIT_NO_INTROSPECTION
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/inst.h b/lib/lepton/asmjit/core/inst.h
new file mode 100644
index 0000000000..643678971a
--- /dev/null
+++ b/lib/lepton/asmjit/core/inst.h
@@ -0,0 +1,772 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_INST_H_INCLUDED
+#define ASMJIT_CORE_INST_H_INCLUDED
+
+#include "../core/cpuinfo.h"
+#include "../core/operand.h"
+#include "../core/string.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_instruction_db
+//! \{
+
+//! Describes an instruction id and modifiers used together with the id.
+//!
+//! Each architecture has a set of valid instructions indexed from 0. Instruction with 0 id is, however, a special
+//! instruction that describes a "no instruction" or "invalid instruction". Different architectures can assign a.
+//! different instruction to the same id, each architecture typicall has its own instructions indexed from 1.
+//!
+//! Instruction identifiers listed by architecture:
+//!
+//!   - \ref x86::Inst (X86 and X86_64)
+//!   - \ref a64::Inst (AArch64)
+typedef uint32_t InstId;
+
+//! Instruction id parts.
+//!
+//! A mask that specifies a bit-layout of \ref InstId.
+enum class InstIdParts : uint32_t {
+  // Common Masks
+  // ------------
+
+  //! Real id without any modifiers (always 16 least significant bits).
+  kRealId   = 0x0000FFFFu,
+  //! Instruction is abstract (or virtual, IR, etc...).
+  kAbstract = 0x80000000u,
+
+  // ARM Specific
+  // ------------
+
+  //! AArch32 first data type, used by ASIMD instructions (`inst.dt.dt2`).
+  kA32_DT   = 0x000F0000u,
+  //! AArch32 second data type, used by ASIMD instructions (`inst.dt.dt2`).
+  kA32_DT2  = 0x00F00000u,
+  //! AArch32/AArch64 condition code.
+  kARM_Cond = 0x78000000u
+};
+
+//! Instruction options.
+//!
+//! Instruction options complement instruction identifier and attributes.
+enum class InstOptions : uint32_t {
+  //! No options.
+  kNone = 0,
+
+  //! Used internally by emitters for handling errors and rare cases.
+  kReserved = 0x00000001u,
+
+  //! Prevents following a jump during compilation (Compiler).
+  kUnfollow = 0x00000002u,
+
+  //! Overwrite the destination operand(s) (Compiler).
+  //!
+  //! Hint that is important for register liveness analysis. It tells the compiler that the destination operand will
+  //! be overwritten now or by adjacent instructions. Compiler knows when a register is completely overwritten by a
+  //! single instruction, for example you don't have to mark "movaps" or "pxor x, x", however, if a pair of
+  //! instructions is used and the first of them doesn't completely overwrite the content of the destination,
+  //! Compiler fails to mark that register as dead.
+  //!
+  //! X86 Specific
+  //! ------------
+  //!
+  //!   - All instructions that always overwrite at least the size of the register the virtual-register uses, for
+  //!     example "mov", "movq", "movaps" don't need the overwrite option to be used - conversion, shuffle, and
+  //!     other miscellaneous instructions included.
+  //!
+  //!   - All instructions that clear the destination register if all operands are the same, for example "xor x, x",
+  //!     "pcmpeqb x x", etc...
+  //!
+  //!   - Consecutive instructions that partially overwrite the variable until there is no old content require
+  //!     `BaseCompiler::overwrite()` to be used. Some examples (not always the best use cases thought):
+  //!
+  //!     - `movlps xmm0, ?` followed by `movhps xmm0, ?` and vice versa
+  //!     - `movlpd xmm0, ?` followed by `movhpd xmm0, ?` and vice versa
+  //!     - `mov al, ?` followed by `and ax, 0xFF`
+  //!     - `mov al, ?` followed by `mov ah, al`
+  //!     - `pinsrq xmm0, ?, 0` followed by `pinsrq xmm0, ?, 1`
+  //!
+  //!   - If the allocated virtual register is used temporarily for scalar operations. For example if you allocate a
+  //!     full vector like `x86::Compiler::newXmm()` and then use that vector for scalar operations you should use
+  //!     `overwrite()` directive:
+  //!
+  //!     - `sqrtss x, y` - only LO element of `x` is changed, if you don't
+  //!       use HI elements, use `compiler.overwrite().sqrtss(x, y)`.
+  kOverwrite = 0x00000004u,
+
+  //! Emit short-form of the instruction.
+  kShortForm = 0x00000010u,
+  //! Emit long-form of the instruction.
+  kLongForm = 0x00000020u,
+
+  //! Conditional jump is likely to be taken.
+  kTaken = 0x00000040u,
+  //! Conditional jump is unlikely to be taken.
+  kNotTaken = 0x00000080u,
+
+  // X86 & X64 Options
+  // -----------------
+
+  //! Use ModMR instead of ModRM if applicable.
+  kX86_ModMR = 0x00000100u,
+  //! Use ModRM instead of ModMR if applicable.
+  kX86_ModRM = 0x00000200u,
+  //! Use 3-byte VEX prefix if possible (AVX) (must be 0x00000400).
+  kX86_Vex3 = 0x00000400u,
+  //! Use VEX prefix when both VEX|EVEX prefixes are available (HINT: AVX_VNNI).
+  kX86_Vex = 0x00000800u,
+  //! Use 4-byte EVEX prefix if possible (AVX-512) (must be 0x00001000).
+  kX86_Evex = 0x00001000u,
+
+  //! LOCK prefix (lock-enabled instructions only).
+  kX86_Lock = 0x00002000u,
+  //! REP prefix (string instructions only).
+  kX86_Rep = 0x00004000u,
+  //! REPNE prefix (string instructions only).
+  kX86_Repne = 0x00008000u,
+
+  //! XACQUIRE prefix (only allowed instructions).
+  kX86_XAcquire = 0x00010000u,
+  //! XRELEASE prefix (only allowed instructions).
+  kX86_XRelease = 0x00020000u,
+
+  //! AVX-512: embedded-rounding {er} and implicit {sae}.
+  kX86_ER = 0x00040000u,
+  //! AVX-512: suppress-all-exceptions {sae}.
+  kX86_SAE = 0x00080000u,
+  //! AVX-512: round-to-nearest (even) {rn-sae} (bits 00).
+  kX86_RN_SAE = 0x00000000u,
+  //! AVX-512: round-down (toward -inf) {rd-sae} (bits 01).
+  kX86_RD_SAE = 0x00200000u,
+  //! AVX-512: round-up (toward +inf) {ru-sae} (bits 10).
+  kX86_RU_SAE = 0x00400000u,
+  //! AVX-512: round-toward-zero (truncate) {rz-sae} (bits 11).
+  kX86_RZ_SAE = 0x00600000u,
+  //! AVX-512: Use zeroing {k}{z} instead of merging {k}.
+  kX86_ZMask = 0x00800000u,
+
+  //! AVX-512: Mask to get embedded rounding bits (2 bits).
+  kX86_ERMask = kX86_RZ_SAE,
+  //! AVX-512: Mask of all possible AVX-512 options except EVEX prefix flag.
+  kX86_AVX512Mask = 0x00FC0000u,
+
+  //! Force REX.B and/or VEX.B field (X64 only).
+  kX86_OpCodeB = 0x01000000u,
+  //! Force REX.X and/or VEX.X field (X64 only).
+  kX86_OpCodeX = 0x02000000u,
+  //! Force REX.R and/or VEX.R field (X64 only).
+  kX86_OpCodeR = 0x04000000u,
+  //! Force REX.W and/or VEX.W field (X64 only).
+  kX86_OpCodeW = 0x08000000u,
+  //! Force REX prefix (X64 only).
+  kX86_Rex = 0x40000000u,
+  //! Invalid REX prefix (set by X86 or when AH|BH|CH|DH regs are used on X64).
+  kX86_InvalidRex = 0x80000000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(InstOptions)
+
+//! Instruction control flow.
+enum class InstControlFlow : uint32_t {
+  //! Regular instruction.
+  kRegular = 0u,
+  //! Unconditional jump.
+  kJump = 1u,
+  //! Conditional jump (branch).
+  kBranch = 2u,
+  //! Function call.
+  kCall = 3u,
+  //! Function return.
+  kReturn = 4u,
+
+  //! Maximum value of `InstType`.
+  kMaxValue = kReturn
+};
+
+//! Hint that is used when both input operands to the instruction are the same.
+//!
+//! Provides hints to the instrution RW query regarding special cases in which two or more operands are the same
+//! registers. This is required by instructions such as XOR, AND, OR, SUB, etc... These hints will influence the
+//! RW operations query.
+enum class InstSameRegHint : uint8_t {
+  //! No special handling.
+  kNone = 0,
+  //! Operands become read-only, the operation doesn't change the content - `X & X` and similar.
+  kRO = 1,
+  //! Operands become write-only, the content of the input(s) don't matter - `X ^ X`, `X - X`, and similar.
+  kWO = 2
+};
+
+//! Instruction id, options, and extraReg in a single structure. This structure exists mainly to simplify analysis
+//! and validation API that requires `BaseInst` and `Operand[]` array.
+class BaseInst {
+public:
+  //! \name Members
+  //! \{
+
+  //! Instruction id with modifiers.
+  InstId _id;
+  //! Instruction options.
+  InstOptions _options;
+  //! Extra register used by the instruction (either REP register or AVX-512 selector).
+  RegOnly _extraReg;
+
+  enum Id : uint32_t {
+    //! Invalid or uninitialized instruction id.
+    kIdNone = 0x00000000u,
+    //! Abstract instruction (BaseBuilder and BaseCompiler).
+    kIdAbstract = 0x80000000u
+  };
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new BaseInst instance with `id` and `options` set.
+  //!
+  //! Default values of `id` and `options` are zero, which means 'none' instruction. Such instruction is guaranteed
+  //! to never exist for any architecture supported by AsmJit.
+  inline explicit BaseInst(InstId instId = 0, InstOptions options = InstOptions::kNone) noexcept
+    : _id(instId),
+      _options(options),
+      _extraReg() {}
+
+  inline BaseInst(InstId instId, InstOptions options, const RegOnly& extraReg) noexcept
+    : _id(instId),
+      _options(options),
+      _extraReg(extraReg) {}
+
+  inline BaseInst(InstId instId, InstOptions options, const BaseReg& extraReg) noexcept
+    : _id(instId),
+      _options(options),
+      _extraReg { extraReg.signature(), extraReg.id() } {}
+
+  //! \}
+
+  //! \name Instruction id and modifiers
+  //! \{
+
+  //! Returns the instruction id with modifiers.
+  inline InstId id() const noexcept { return _id; }
+  //! Sets the instruction id and modiiers from `id`.
+  inline void setId(InstId id) noexcept { _id = id; }
+  //! Resets the instruction id and modifiers to zero, see \ref kIdNone.
+  inline void resetId() noexcept { _id = 0; }
+
+  //! Returns a real instruction id that doesn't contain any modifiers.
+  inline InstId realId() const noexcept { return _id & uint32_t(InstIdParts::kRealId); }
+
+  template<InstIdParts kPart>
+  inline uint32_t getInstIdPart() const noexcept {
+    return (uint32_t(_id) & uint32_t(kPart)) >> Support::ConstCTZ<uint32_t(kPart)>::value;
+  }
+
+  template<InstIdParts kPart>
+  inline void setInstIdPart(uint32_t value) noexcept {
+    _id = (_id & ~uint32_t(kPart)) | (value << Support::ConstCTZ<uint32_t(kPart)>::value);
+  }
+
+  //! \}
+
+  //! \name Instruction Options
+  //! \{
+
+  inline InstOptions options() const noexcept { return _options; }
+  inline bool hasOption(InstOptions option) const noexcept { return Support::test(_options, option); }
+  inline void setOptions(InstOptions options) noexcept { _options = options; }
+  inline void addOptions(InstOptions options) noexcept { _options |= options; }
+  inline void clearOptions(InstOptions options) noexcept { _options &= ~options; }
+  inline void resetOptions() noexcept { _options = InstOptions::kNone; }
+
+  //! \}
+
+  //! \name Extra Register
+  //! \{
+
+  inline bool hasExtraReg() const noexcept { return _extraReg.isReg(); }
+  inline RegOnly& extraReg() noexcept { return _extraReg; }
+  inline const RegOnly& extraReg() const noexcept { return _extraReg; }
+  inline void setExtraReg(const BaseReg& reg) noexcept { _extraReg.init(reg); }
+  inline void setExtraReg(const RegOnly& reg) noexcept { _extraReg.init(reg); }
+  inline void resetExtraReg() noexcept { _extraReg.reset(); }
+
+  //! \}
+
+  //! \name ARM Specific
+  //! \{
+
+  inline arm::CondCode armCondCode() const noexcept { return (arm::CondCode)getInstIdPart<InstIdParts::kARM_Cond>(); }
+  inline void setArmCondCode(arm::CondCode cc) noexcept { setInstIdPart<InstIdParts::kARM_Cond>(uint32_t(cc)); }
+
+  //! \}
+
+  //! \name Statics
+  //! \{
+
+  static inline constexpr InstId composeARMInstId(uint32_t id, arm::CondCode cc) noexcept {
+    return id | (uint32_t(cc) << Support::ConstCTZ<uint32_t(InstIdParts::kARM_Cond)>::value);
+  }
+
+  static inline constexpr InstId extractRealId(uint32_t id) noexcept {
+    return id & uint32_t(InstIdParts::kRealId);
+  }
+
+  static inline constexpr arm::CondCode extractARMCondCode(uint32_t id) noexcept {
+    return (arm::CondCode)((uint32_t(id) & uint32_t(InstIdParts::kARM_Cond)) >> Support::ConstCTZ<uint32_t(InstIdParts::kARM_Cond)>::value);
+  }
+
+  //! \}
+};
+
+//! CPU read/write flags used by \ref InstRWInfo.
+//!
+//! These flags can be used to get a basic overview about CPU specifics flags used by instructions.
+enum class CpuRWFlags : uint32_t {
+  //! No flags.
+  kNone = 0x00000000u,
+
+  // Common RW Flags (0x000000FF)
+  // ----------------------------
+
+  //! Carry flag.
+  kCF = 0x00000001u,
+  //! Signed overflow flag.
+  kOF = 0x00000002u,
+  //! Sign flag (negative/sign, if set).
+  kSF = 0x00000004u,
+  //! Zero and/or equality flag (1 if zero/equal).
+  kZF = 0x00000008u,
+
+  // X86 Specific RW Flags (0xFFFFFF00)
+  // ----------------------------------
+
+  //! Carry flag (X86, X86_64).
+  kX86_CF = kCF,
+  //! Overflow flag (X86, X86_64).
+  kX86_OF = kOF,
+  //! Sign flag (X86, X86_64).
+  kX86_SF = kSF,
+  //! Zero flag (X86, X86_64).
+  kX86_ZF = kZF,
+
+  //! Adjust flag (X86, X86_64).
+  kX86_AF = 0x00000100u,
+  //! Parity flag (X86, X86_64).
+  kX86_PF = 0x00000200u,
+  //! Direction flag (X86, X86_64).
+  kX86_DF = 0x00000400u,
+  //! Interrupt enable flag (X86, X86_64).
+  kX86_IF = 0x00000800u,
+
+  //! Alignment check flag (X86, X86_64).
+  kX86_AC = 0x00001000u,
+
+  //! FPU C0 status flag (X86, X86_64).
+  kX86_C0 = 0x00010000u,
+  //! FPU C1 status flag (X86, X86_64).
+  kX86_C1 = 0x00020000u,
+  //! FPU C2 status flag (X86, X86_64).
+  kX86_C2 = 0x00040000u,
+  //! FPU C3 status flag (X86, X86_64).
+  kX86_C3 = 0x00080000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(CpuRWFlags)
+
+//! Operand read/write flags describe how the operand is accessed and some additional features.
+enum class OpRWFlags {
+  //! No flags.
+  kNone = 0,
+
+  //! Operand is read.
+  kRead = 0x00000001u,
+
+  //! Operand is written.
+  kWrite = 0x00000002u,
+
+  //! Operand is both read and written.
+  kRW = 0x00000003u,
+
+  //! Register operand can be replaced by a memory operand.
+  kRegMem = 0x00000004u,
+
+  //! The register must be allocated to the index of the previous register + 1.
+  //!
+  //! This flag is used by all architectures to describe instructions that use consecutive registers, where only the
+  //! first one is encoded in the instruction, and the others are just a sequence that starts with the first one. On
+  //! X86/X86_64 architecture this is used by instructions such as V4FMADDPS, V4FMADDSS, V4FNMADDPS, V4FNMADDSS,
+  //! VP4DPWSSD, VP4DPWSSDS, VP2INTERSECTD, and VP2INTERSECTQ. On ARM/AArch64 this is used by vector load and store
+  //! instructions that can load or store multiple registers at once.
+  kConsecutive = 0x00000008u,
+
+  //! The `extendByteMask()` represents a zero extension.
+  kZExt = 0x00000010u,
+
+  //! Register operand must use \ref OpRWInfo::physId().
+  kRegPhysId = 0x00000100u,
+  //! Base register of a memory operand must use \ref OpRWInfo::physId().
+  kMemPhysId = 0x00000200u,
+
+  //! This memory operand is only used to encode registers and doesn't access memory.
+  //!
+  //! X86 Specific
+  //! ------------
+  //!
+  //! Instructions that use such feature include BNDLDX, BNDSTX, and LEA.
+  kMemFake = 0x000000400u,
+
+  //! Base register of the memory operand will be read.
+  kMemBaseRead = 0x00001000u,
+  //! Base register of the memory operand will be written.
+  kMemBaseWrite = 0x00002000u,
+  //! Base register of the memory operand will be read & written.
+  kMemBaseRW = 0x00003000u,
+
+  //! Index register of the memory operand will be read.
+  kMemIndexRead = 0x00004000u,
+  //! Index register of the memory operand will be written.
+  kMemIndexWrite = 0x00008000u,
+  //! Index register of the memory operand will be read & written.
+  kMemIndexRW = 0x0000C000u,
+
+  //! Base register of the memory operand will be modified before the operation.
+  kMemBasePreModify = 0x00010000u,
+  //! Base register of the memory operand will be modified after the operation.
+  kMemBasePostModify = 0x00020000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(OpRWFlags)
+
+// Don't remove these asserts. Read/Write flags are used extensively
+// by Compiler and they must always be compatible with constants below.
+static_assert(uint32_t(OpRWFlags::kRead) == 0x1, "OpRWFlags::kRead flag must be 0x1");
+static_assert(uint32_t(OpRWFlags::kWrite) == 0x2, "OpRWFlags::kWrite flag must be 0x2");
+static_assert(uint32_t(OpRWFlags::kRegMem) == 0x4, "OpRWFlags::kRegMem flag must be 0x4");
+
+//! Read/Write information related to a single operand, used by \ref InstRWInfo.
+struct OpRWInfo {
+  //! \name Members
+  //! \{
+
+  //! Read/Write flags.
+  OpRWFlags _opFlags;
+  //! Physical register index, if required.
+  uint8_t _physId;
+  //! Size of a possible memory operand that can replace a register operand.
+  uint8_t _rmSize;
+  //! If non-zero, then this is a consecutive lead register, and the value describes how many registers follow.
+  uint8_t _consecutiveLeadCount;
+  //! Reserved for future use.
+  uint8_t _reserved[1];
+  //! Read bit-mask where each bit represents one byte read from Reg/Mem.
+  uint64_t _readByteMask;
+  //! Write bit-mask where each bit represents one byte written to Reg/Mem.
+  uint64_t _writeByteMask;
+  //! Zero/Sign extend bit-mask where each bit represents one byte written to Reg/Mem.
+  uint64_t _extendByteMask;
+
+  //! \}
+
+  //! \name Reset
+  //! \{
+
+  //! Resets this operand information to all zeros.
+  inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
+
+  //! Resets this operand info (resets all members) and set common information
+  //! to the given `opFlags`, `regSize`, and possibly `physId`.
+  inline void reset(OpRWFlags opFlags, uint32_t regSize, uint32_t physId = BaseReg::kIdBad) noexcept {
+    _opFlags = opFlags;
+    _physId = uint8_t(physId);
+    _rmSize = Support::test(opFlags, OpRWFlags::kRegMem) ? uint8_t(regSize) : uint8_t(0);
+    _consecutiveLeadCount = 0;
+    _resetReserved();
+
+    uint64_t mask = Support::lsbMask<uint64_t>(regSize);
+    _readByteMask = Support::test(opFlags, OpRWFlags::kRead) ? mask : uint64_t(0);
+    _writeByteMask = Support::test(opFlags, OpRWFlags::kWrite) ? mask : uint64_t(0);
+    _extendByteMask = 0;
+  }
+
+  inline void _resetReserved() noexcept {
+    _reserved[0] = 0;
+  }
+
+  //! \}
+
+  //! \name Operand Flags
+  //! \{
+
+  //! Returns operand flags.
+  inline OpRWFlags opFlags() const noexcept { return _opFlags; }
+  //! Tests whether operand flags contain the given `flag`.
+  inline bool hasOpFlag(OpRWFlags flag) const noexcept { return Support::test(_opFlags, flag); }
+
+  //! Adds the given `flags` to operand flags.
+  inline void addOpFlags(OpRWFlags flags) noexcept { _opFlags |= flags; }
+  //! Removes the given `flags` from operand flags.
+  inline void clearOpFlags(OpRWFlags flags) noexcept { _opFlags &= ~flags; }
+
+  //! Tests whether this operand is read from.
+  inline bool isRead() const noexcept { return hasOpFlag(OpRWFlags::kRead); }
+  //! Tests whether this operand is written to.
+  inline bool isWrite() const noexcept { return hasOpFlag(OpRWFlags::kWrite); }
+  //! Tests whether this operand is both read and write.
+  inline bool isReadWrite() const noexcept { return (_opFlags & OpRWFlags::kRW) == OpRWFlags::kRW; }
+  //! Tests whether this operand is read only.
+  inline bool isReadOnly() const noexcept { return (_opFlags & OpRWFlags::kRW) == OpRWFlags::kRead; }
+  //! Tests whether this operand is write only.
+  inline bool isWriteOnly() const noexcept { return (_opFlags & OpRWFlags::kRW) == OpRWFlags::kWrite; }
+
+  //! Returns the type of a lead register, which is followed by consecutive registers.
+  inline uint32_t consecutiveLeadCount() const noexcept { return _consecutiveLeadCount; }
+
+  //! Tests whether this operand is Reg/Mem
+  //!
+  //! Reg/Mem operands can use either register or memory.
+  inline bool isRm() const noexcept { return hasOpFlag(OpRWFlags::kRegMem); }
+
+  //! Tests whether the operand will be zero extended.
+  inline bool isZExt() const noexcept { return hasOpFlag(OpRWFlags::kZExt); }
+
+  //! \}
+
+  //! \name Memory Flags
+  //! \{
+
+  //! Tests whether this is a fake memory operand, which is only used, because of encoding. Fake memory operands do
+  //! not access any memory, they are only used to encode registers.
+  inline bool isMemFake() const noexcept { return hasOpFlag(OpRWFlags::kMemFake); }
+
+  //! Tests whether the instruction's memory BASE register is used.
+  inline bool isMemBaseUsed() const noexcept { return hasOpFlag(OpRWFlags::kMemBaseRW); }
+  //! Tests whether the instruction reads from its BASE registers.
+  inline bool isMemBaseRead() const noexcept { return hasOpFlag(OpRWFlags::kMemBaseRead); }
+  //! Tests whether the instruction writes to its BASE registers.
+  inline bool isMemBaseWrite() const noexcept { return hasOpFlag(OpRWFlags::kMemBaseWrite); }
+  //! Tests whether the instruction reads and writes from/to its BASE registers.
+  inline bool isMemBaseReadWrite() const noexcept { return (_opFlags & OpRWFlags::kMemBaseRW) == OpRWFlags::kMemBaseRW; }
+  //! Tests whether the instruction only reads from its BASE registers.
+  inline bool isMemBaseReadOnly() const noexcept { return (_opFlags & OpRWFlags::kMemBaseRW) == OpRWFlags::kMemBaseRead; }
+  //! Tests whether the instruction only writes to its BASE registers.
+  inline bool isMemBaseWriteOnly() const noexcept { return (_opFlags & OpRWFlags::kMemBaseRW) == OpRWFlags::kMemBaseWrite; }
+
+  //! Tests whether the instruction modifies the BASE register before it uses it to calculate the target address.
+  inline bool isMemBasePreModify() const noexcept { return hasOpFlag(OpRWFlags::kMemBasePreModify); }
+  //! Tests whether the instruction modifies the BASE register after it uses it to calculate the target address.
+  inline bool isMemBasePostModify() const noexcept { return hasOpFlag(OpRWFlags::kMemBasePostModify); }
+
+  //! Tests whether the instruction's memory INDEX register is used.
+  inline bool isMemIndexUsed() const noexcept { return hasOpFlag(OpRWFlags::kMemIndexRW); }
+  //! Tests whether the instruction reads the INDEX registers.
+  inline bool isMemIndexRead() const noexcept { return hasOpFlag(OpRWFlags::kMemIndexRead); }
+  //! Tests whether the instruction writes to its INDEX registers.
+  inline bool isMemIndexWrite() const noexcept { return hasOpFlag(OpRWFlags::kMemIndexWrite); }
+  //! Tests whether the instruction reads and writes from/to its INDEX registers.
+  inline bool isMemIndexReadWrite() const noexcept { return (_opFlags & OpRWFlags::kMemIndexRW) == OpRWFlags::kMemIndexRW; }
+  //! Tests whether the instruction only reads from its INDEX registers.
+  inline bool isMemIndexReadOnly() const noexcept { return (_opFlags & OpRWFlags::kMemIndexRW) == OpRWFlags::kMemIndexRead; }
+  //! Tests whether the instruction only writes to its INDEX registers.
+  inline bool isMemIndexWriteOnly() const noexcept { return (_opFlags & OpRWFlags::kMemIndexRW) == OpRWFlags::kMemIndexWrite; }
+
+  //! \}
+
+  //! \name Physical Register ID
+  //! \{
+
+  //! Returns a physical id of the register that is fixed for this operand.
+  //!
+  //! Returns \ref BaseReg::kIdBad if any register can be used.
+  inline uint32_t physId() const noexcept { return _physId; }
+  //! Tests whether \ref physId() would return a valid physical register id.
+  inline bool hasPhysId() const noexcept { return _physId != BaseReg::kIdBad; }
+  //! Sets physical register id, which would be fixed for this operand.
+  inline void setPhysId(uint32_t physId) noexcept { _physId = uint8_t(physId); }
+
+  //! \}
+
+  //! \name Reg/Mem Information
+  //! \{
+
+  //! Returns Reg/Mem size of the operand.
+  inline uint32_t rmSize() const noexcept { return _rmSize; }
+  //! Sets Reg/Mem size of the operand.
+  inline void setRmSize(uint32_t rmSize) noexcept { _rmSize = uint8_t(rmSize); }
+
+  //! \}
+
+  //! \name Read & Write Masks
+  //! \{
+
+  //! Returns read mask.
+  inline uint64_t readByteMask() const noexcept { return _readByteMask; }
+  //! Returns write mask.
+  inline uint64_t writeByteMask() const noexcept { return _writeByteMask; }
+  //! Returns extend mask.
+  inline uint64_t extendByteMask() const noexcept { return _extendByteMask; }
+
+  //! Sets read mask.
+  inline void setReadByteMask(uint64_t mask) noexcept { _readByteMask = mask; }
+  //! Sets write mask.
+  inline void setWriteByteMask(uint64_t mask) noexcept { _writeByteMask = mask; }
+  //! Sets externd mask.
+  inline void setExtendByteMask(uint64_t mask) noexcept { _extendByteMask = mask; }
+
+  //! \}
+};
+
+//! Flags used by \ref InstRWInfo.
+enum class InstRWFlags : uint32_t {
+  //! No flags.
+  kNone = 0x00000000u,
+
+  //! Describes a move operation.
+  //!
+  //! This flag is used by RA to eliminate moves that are guaranteed to be moves only.
+  kMovOp = 0x00000001u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(InstRWFlags)
+
+//! Read/Write information of an instruction.
+struct InstRWInfo {
+  //! \name Members
+  //! \{
+
+  //! Instruction flags (there are no flags at the moment, this field is reserved).
+  InstRWFlags _instFlags;
+  //! CPU flags read.
+  CpuRWFlags _readFlags;
+  //! CPU flags written.
+  CpuRWFlags _writeFlags;
+  //! Count of operands.
+  uint8_t _opCount;
+  //! CPU feature required for replacing register operand with memory operand.
+  uint8_t _rmFeature;
+  //! Reserved for future use.
+  uint8_t _reserved[18];
+  //! Read/Write onfo of extra register (rep{} or kz{}).
+  OpRWInfo _extraReg;
+  //! Read/Write info of instruction operands.
+  OpRWInfo _operands[Globals::kMaxOpCount];
+
+  //! \}
+
+  //! \name Commons
+  //! \{
+
+  //! Resets this RW information to all zeros.
+  inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
+
+  //! \}
+
+  //! \name Instruction Flags
+  //! \{
+
+  //! Returns flags associated with the instruction, see \ref InstRWFlags.
+  inline InstRWFlags instFlags() const noexcept { return _instFlags; }
+
+  //! Tests whether the instruction flags contain `flag`.
+  inline bool hasInstFlag(InstRWFlags flag) const noexcept { return Support::test(_instFlags, flag); }
+
+  //! Tests whether the instruction flags contain \ref InstRWFlags::kMovOp.
+  inline bool isMovOp() const noexcept { return hasInstFlag(InstRWFlags::kMovOp); }
+
+  //! \}
+
+  //! \name CPU Flags Information
+  //! \{
+
+  //! Returns a mask of CPU flags read.
+  inline CpuRWFlags readFlags() const noexcept { return _readFlags; }
+  //! Returns a mask of CPU flags written.
+  inline CpuRWFlags writeFlags() const noexcept { return _writeFlags; }
+
+  //! \}
+
+  //! \name Reg/Mem Information
+  //! \{
+
+  //! Returns the CPU feature required to replace a register operand with memory operand. If the returned feature is
+  //! zero (none) then this instruction either doesn't provide memory operand combination or there is no extra CPU
+  //! feature required.
+  //!
+  //! X86 Specific
+  //! ------------
+  //!
+  //! Some AVX+ instructions may require extra features for replacing registers with memory operands, for example
+  //! VPSLLDQ instruction only supports `vpslldq reg, reg, imm` combination on AVX/AVX2 capable CPUs and requires
+  //! AVX-512 for `vpslldq reg, mem, imm` combination.
+  inline uint32_t rmFeature() const noexcept { return _rmFeature; }
+
+  //! \}
+
+  //! \name Operand Read/Write Information
+  //! \{
+
+  //! Returns RW information of extra register operand (extraReg).
+  inline const OpRWInfo& extraReg() const noexcept { return _extraReg; }
+
+  //! Returns RW information of all instruction's operands.
+  inline const OpRWInfo* operands() const noexcept { return _operands; }
+
+  //! Returns RW information of the operand at the given `index`.
+  inline const OpRWInfo& operand(size_t index) const noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxOpCount);
+    return _operands[index];
+  }
+
+  //! Returns the number of operands this instruction has.
+  inline uint32_t opCount() const noexcept { return _opCount; }
+
+  //! \}
+};
+
+//! Validation flags that can be used with \ref InstAPI::validate().
+enum class ValidationFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+  //! Allow virtual registers in the instruction.
+  kEnableVirtRegs = 0x01u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(ValidationFlags)
+
+//! Instruction API.
+namespace InstAPI {
+
+#ifndef ASMJIT_NO_TEXT
+//! Appends the name of the instruction specified by `instId` and `instOptions` into the `output` string.
+//!
+//! \note Instruction options would only affect instruction prefix & suffix, other options would be ignored.
+//! If `instOptions` is zero then only raw instruction name (without any additional text) will be appended.
+ASMJIT_API Error instIdToString(Arch arch, InstId instId, String& output) noexcept;
+
+//! Parses an instruction name in the given string `s`. Length is specified by `len` argument, which can be
+//! `SIZE_MAX` if `s` is known to be null terminated.
+//!
+//! Returns the parsed instruction id or \ref BaseInst::kIdNone if no such instruction exists.
+ASMJIT_API InstId stringToInstId(Arch arch, const char* s, size_t len) noexcept;
+#endif // !ASMJIT_NO_TEXT
+
+#ifndef ASMJIT_NO_VALIDATION
+//! Validates the given instruction considering the given `validationFlags`.
+ASMJIT_API Error validate(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags = ValidationFlags::kNone) noexcept;
+#endif // !ASMJIT_NO_VALIDATION
+
+#ifndef ASMJIT_NO_INTROSPECTION
+//! Gets Read/Write information of the given instruction.
+ASMJIT_API Error queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept;
+
+//! Gets CPU features required by the given instruction.
+ASMJIT_API Error queryFeatures(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, CpuFeatures* out) noexcept;
+#endif // !ASMJIT_NO_INTROSPECTION
+
+} // {InstAPI}
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_INST_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/jitallocator.cpp b/lib/lepton/asmjit/core/jitallocator.cpp
new file mode 100644
index 0000000000..19fbe4b233
--- /dev/null
+++ b/lib/lepton/asmjit/core/jitallocator.cpp
@@ -0,0 +1,1242 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_JIT
+
+#include "../core/archtraits.h"
+#include "../core/jitallocator.h"
+#include "../core/osutils_p.h"
+#include "../core/support.h"
+#include "../core/virtmem.h"
+#include "../core/zone.h"
+#include "../core/zonelist.h"
+#include "../core/zonetree.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// JitAllocator - Constants
+// ========================
+
+//! Number of pools to use when `JitAllocatorOptions::kUseMultiplePools` is set.
+//!
+//! Each pool increases granularity twice to make memory management more
+//! efficient. Ideal number of pools appears to be 3 to 4 as it distributes
+//! small and large functions properly.
+static constexpr uint32_t kJitAllocatorMultiPoolCount = 3;
+
+//! Minimum granularity (and the default granularity for pool #0).
+static constexpr uint32_t kJitAllocatorBaseGranularity = 64;
+
+//! Maximum block size (32MB).
+static constexpr uint32_t kJitAllocatorMaxBlockSize = 1024 * 1024 * 32;
+
+// JitAllocator - Fill Pattern
+// ===========================
+
+static inline uint32_t JitAllocator_defaultFillPattern() noexcept {
+  // X86 and X86_64 - 4x 'int3' instruction.
+  if (ASMJIT_ARCH_X86)
+    return 0xCCCCCCCCu;
+
+  // Unknown...
+  return 0u;
+}
+
+// JitAllocator - BitVectorRangeIterator
+// =====================================
+
+template<typename T, uint32_t B>
+class BitVectorRangeIterator {
+public:
+  const T* _ptr;
+  size_t _idx;
+  size_t _end;
+  T _bitWord;
+
+  enum : uint32_t { kBitWordSize = Support::bitSizeOf<T>() };
+  enum : T { kXorMask = B == 0 ? Support::allOnes<T>() : T(0) };
+
+  ASMJIT_FORCE_INLINE BitVectorRangeIterator(const T* data, size_t numBitWords) noexcept {
+    init(data, numBitWords);
+  }
+
+  ASMJIT_FORCE_INLINE BitVectorRangeIterator(const T* data, size_t numBitWords, size_t start, size_t end) noexcept {
+    init(data, numBitWords, start, end);
+  }
+
+  ASMJIT_FORCE_INLINE void init(const T* data, size_t numBitWords) noexcept {
+    init(data, numBitWords, 0, numBitWords * kBitWordSize);
+  }
+
+  ASMJIT_FORCE_INLINE void init(const T* data, size_t numBitWords, size_t start, size_t end) noexcept {
+    ASMJIT_ASSERT(numBitWords >= (end + kBitWordSize - 1) / kBitWordSize);
+    DebugUtils::unused(numBitWords);
+
+    size_t idx = Support::alignDown(start, kBitWordSize);
+    const T* ptr = data + (idx / kBitWordSize);
+
+    T bitWord = 0;
+    if (idx < end)
+      bitWord = (*ptr ^ kXorMask) & (Support::allOnes<T>() << (start % kBitWordSize));
+
+    _ptr = ptr;
+    _idx = idx;
+    _end = end;
+    _bitWord = bitWord;
+  }
+
+  ASMJIT_FORCE_INLINE bool nextRange(size_t* rangeStart, size_t* rangeEnd, size_t rangeHint = std::numeric_limits<size_t>::max()) noexcept {
+    // Skip all empty BitWords.
+    while (_bitWord == 0) {
+      _idx += kBitWordSize;
+      if (_idx >= _end)
+        return false;
+      _bitWord = (*++_ptr) ^ kXorMask;
+    }
+
+    size_t i = Support::ctz(_bitWord);
+
+    *rangeStart = _idx + i;
+    _bitWord = ~(_bitWord ^ ~(Support::allOnes<T>() << i));
+
+    if (_bitWord == 0) {
+      *rangeEnd = Support::min(_idx + kBitWordSize, _end);
+      while (*rangeEnd - *rangeStart < rangeHint) {
+        _idx += kBitWordSize;
+        if (_idx >= _end)
+          break;
+
+        _bitWord = (*++_ptr) ^ kXorMask;
+        if (_bitWord != Support::allOnes<T>()) {
+          size_t j = Support::ctz(~_bitWord);
+          *rangeEnd = Support::min(_idx + j, _end);
+          _bitWord = _bitWord ^ ~(Support::allOnes<T>() << j);
+          break;
+        }
+
+        *rangeEnd = Support::min(_idx + kBitWordSize, _end);
+        _bitWord = 0;
+        continue;
+      }
+
+      return true;
+    }
+    else {
+      size_t j = Support::ctz(_bitWord);
+      *rangeEnd = Support::min(_idx + j, _end);
+
+      _bitWord = ~(_bitWord ^ ~(Support::allOnes<T>() << j));
+      return true;
+    }
+  }
+};
+
+// JitAllocator - Pool
+// ===================
+
+class JitAllocatorBlock;
+
+class JitAllocatorPool {
+public:
+  ASMJIT_NONCOPYABLE(JitAllocatorPool)
+
+  //! Double linked list of blocks.
+  ZoneList<JitAllocatorBlock> blocks;
+  //! Where to start looking first.
+  JitAllocatorBlock* cursor;
+
+  //! Count of blocks.
+  uint32_t blockCount;
+  //! Allocation granularity.
+  uint16_t granularity;
+  //! Log2(granularity).
+  uint8_t granularityLog2;
+  //! Count of empty blocks (either 0 or 1 as we won't keep more blocks empty).
+  uint8_t emptyBlockCount;
+
+  //! Number of bits reserved across all blocks.
+  size_t totalAreaSize;
+  //! Number of bits used across all blocks.
+  size_t totalAreaUsed;
+  //! Overhead of all blocks (in bytes).
+  size_t totalOverheadBytes;
+
+  inline JitAllocatorPool(uint32_t granularity) noexcept
+    : blocks(),
+      cursor(nullptr),
+      blockCount(0),
+      granularity(uint16_t(granularity)),
+      granularityLog2(uint8_t(Support::ctz(granularity))),
+      emptyBlockCount(0),
+      totalAreaSize(0),
+      totalAreaUsed(0),
+      totalOverheadBytes(0) {}
+
+  inline void reset() noexcept {
+    blocks.reset();
+    cursor = nullptr;
+    blockCount = 0;
+    totalAreaSize = 0;
+    totalAreaUsed = 0;
+    totalOverheadBytes = 0;
+  }
+
+  inline size_t byteSizeFromAreaSize(uint32_t areaSize) const noexcept { return size_t(areaSize) * granularity; }
+  inline uint32_t areaSizeFromByteSize(size_t size) const noexcept { return uint32_t((size + granularity - 1) >> granularityLog2); }
+
+  inline size_t bitWordCountFromAreaSize(uint32_t areaSize) const noexcept {
+    using namespace Support;
+    return alignUp<size_t>(areaSize, kBitWordSizeInBits) / kBitWordSizeInBits;
+  }
+};
+
+// JitAllocator - Block
+// ====================
+
+class JitAllocatorBlock : public ZoneTreeNodeT<JitAllocatorBlock>,
+                          public ZoneListNode<JitAllocatorBlock> {
+public:
+  ASMJIT_NONCOPYABLE(JitAllocatorBlock)
+
+  enum Flags : uint32_t {
+    //! Block is empty.
+    kFlagEmpty = 0x00000001u,
+    //! Block is dirty (largestUnusedArea, searchStart, searchEnd).
+    kFlagDirty = 0x00000002u,
+    //! Block is dual-mapped.
+    kFlagDualMapped = 0x00000004u
+  };
+
+  //! Link to the pool that owns this block.
+  JitAllocatorPool* _pool;
+  //! Virtual memory mapping - either single mapping (both pointers equal) or
+  //! dual mapping, where one pointer is Read+Execute and the second Read+Write.
+  VirtMem::DualMapping _mapping;
+  //! Virtual memory size (block size) [bytes].
+  size_t _blockSize;
+
+  //! Block flags.
+  uint32_t _flags;
+  //! Size of the whole block area (bit-vector size).
+  uint32_t _areaSize;
+  //! Used area (number of bits in bit-vector used).
+  uint32_t _areaUsed;
+  //! The largest unused continuous area in the bit-vector (or `areaSize` to initiate rescan).
+  uint32_t _largestUnusedArea;
+  //! Start of a search range (for unused bits).
+  uint32_t _searchStart;
+  //! End of a search range (for unused bits).
+  uint32_t _searchEnd;
+
+  //! Used bit-vector (0 = unused, 1 = used).
+  Support::BitWord* _usedBitVector;
+  //! Stop bit-vector (0 = don't care, 1 = stop).
+  Support::BitWord* _stopBitVector;
+
+  inline JitAllocatorBlock(
+    JitAllocatorPool* pool,
+    VirtMem::DualMapping mapping,
+    size_t blockSize,
+    uint32_t blockFlags,
+    Support::BitWord* usedBitVector,
+    Support::BitWord* stopBitVector,
+    uint32_t areaSize) noexcept
+    : ZoneTreeNodeT(),
+      _pool(pool),
+      _mapping(mapping),
+      _blockSize(blockSize),
+      _flags(blockFlags),
+      _areaSize(areaSize),
+      _areaUsed(0),
+      _largestUnusedArea(areaSize),
+      _searchStart(0),
+      _searchEnd(areaSize),
+      _usedBitVector(usedBitVector),
+      _stopBitVector(stopBitVector) {}
+
+  inline JitAllocatorPool* pool() const noexcept { return _pool; }
+
+  inline uint8_t* rxPtr() const noexcept { return static_cast<uint8_t*>(_mapping.rx); }
+  inline uint8_t* rwPtr() const noexcept { return static_cast<uint8_t*>(_mapping.rw); }
+
+  inline bool hasFlag(uint32_t f) const noexcept { return (_flags & f) != 0; }
+  inline void addFlags(uint32_t f) noexcept { _flags |= f; }
+  inline void clearFlags(uint32_t f) noexcept { _flags &= ~f; }
+
+  inline bool isDirty() const noexcept { return hasFlag(kFlagDirty); }
+  inline void makeDirty() noexcept { addFlags(kFlagDirty); }
+
+  inline size_t blockSize() const noexcept { return _blockSize; }
+
+  inline uint32_t areaSize() const noexcept { return _areaSize; }
+  inline uint32_t areaUsed() const noexcept { return _areaUsed; }
+  inline uint32_t areaAvailable() const noexcept { return _areaSize - _areaUsed; }
+  inline uint32_t largestUnusedArea() const noexcept { return _largestUnusedArea; }
+
+  inline void decreaseUsedArea(uint32_t value) noexcept {
+    _areaUsed -= value;
+    _pool->totalAreaUsed -= value;
+  }
+
+  inline void markAllocatedArea(uint32_t allocatedAreaStart, uint32_t allocatedAreaEnd) noexcept {
+    uint32_t allocatedAreaSize = allocatedAreaEnd - allocatedAreaStart;
+
+    // Mark the newly allocated space as occupied and also the sentinel.
+    Support::bitVectorFill(_usedBitVector, allocatedAreaStart, allocatedAreaSize);
+    Support::bitVectorSetBit(_stopBitVector, allocatedAreaEnd - 1, true);
+
+    // Update search region and statistics.
+    _pool->totalAreaUsed += allocatedAreaSize;
+    _areaUsed += allocatedAreaSize;
+
+    if (areaAvailable() == 0) {
+      _searchStart = _areaSize;
+      _searchEnd = 0;
+      _largestUnusedArea = 0;
+      clearFlags(kFlagDirty);
+    }
+    else {
+      if (_searchStart == allocatedAreaStart)
+        _searchStart = allocatedAreaEnd;
+      if (_searchEnd == allocatedAreaEnd)
+        _searchEnd = allocatedAreaStart;
+      addFlags(kFlagDirty);
+    }
+  }
+
+  inline void markReleasedArea(uint32_t releasedAreaStart, uint32_t releasedAreaEnd) noexcept {
+    uint32_t releasedAreaSize = releasedAreaEnd - releasedAreaStart;
+
+    // Update the search region and statistics.
+    _pool->totalAreaUsed -= releasedAreaSize;
+    _areaUsed -= releasedAreaSize;
+    _searchStart = Support::min(_searchStart, releasedAreaStart);
+    _searchEnd = Support::max(_searchEnd, releasedAreaEnd);
+
+    // Unmark occupied bits and also the sentinel.
+    Support::bitVectorClear(_usedBitVector, releasedAreaStart, releasedAreaSize);
+    Support::bitVectorSetBit(_stopBitVector, releasedAreaEnd - 1, false);
+
+    if (areaUsed() == 0) {
+      _searchStart = 0;
+      _searchEnd = _areaSize;
+      _largestUnusedArea = _areaSize;
+      addFlags(kFlagEmpty);
+      clearFlags(kFlagDirty);
+    }
+    else {
+      addFlags(kFlagDirty);
+    }
+  }
+
+  inline void markShrunkArea(uint32_t shrunkAreaStart, uint32_t shrunkAreaEnd) noexcept {
+    uint32_t shrunkAreaSize = shrunkAreaEnd - shrunkAreaStart;
+
+    // Shrunk area cannot start at zero as it would mean that we have shrunk the first
+    // block to zero bytes, which is not allowed as such block must be released instead.
+    ASMJIT_ASSERT(shrunkAreaStart != 0);
+    ASMJIT_ASSERT(shrunkAreaSize != 0);
+
+    // Update the search region and statistics.
+    _pool->totalAreaUsed -= shrunkAreaSize;
+    _areaUsed -= shrunkAreaSize;
+    _searchStart = Support::min(_searchStart, shrunkAreaStart);
+    _searchEnd = Support::max(_searchEnd, shrunkAreaEnd);
+
+    // Unmark the released space and move the sentinel.
+    Support::bitVectorClear(_usedBitVector, shrunkAreaStart, shrunkAreaSize);
+    Support::bitVectorSetBit(_stopBitVector, shrunkAreaEnd - 1, false);
+    Support::bitVectorSetBit(_stopBitVector, shrunkAreaStart - 1, true);
+
+    addFlags(kFlagDirty);
+  }
+
+  // RBTree default CMP uses '<' and '>' operators.
+  inline bool operator<(const JitAllocatorBlock& other) const noexcept { return rxPtr() < other.rxPtr(); }
+  inline bool operator>(const JitAllocatorBlock& other) const noexcept { return rxPtr() > other.rxPtr(); }
+
+  // Special implementation for querying blocks by `key`, which must be in `[BlockPtr, BlockPtr + BlockSize)` range.
+  inline bool operator<(const uint8_t* key) const noexcept { return rxPtr() + _blockSize <= key; }
+  inline bool operator>(const uint8_t* key) const noexcept { return rxPtr() > key; }
+};
+
+// JitAllocator - PrivateImpl
+// ==========================
+
+class JitAllocatorPrivateImpl : public JitAllocator::Impl {
+public:
+  //! Lock for thread safety.
+  mutable Lock lock;
+  //! System page size (also a minimum block size).
+  uint32_t pageSize;
+  //! Number of active allocations.
+  size_t allocationCount;
+
+  //! Blocks from all pools in RBTree.
+  ZoneTree<JitAllocatorBlock> tree;
+  //! Allocator pools.
+  JitAllocatorPool* pools;
+  //! Number of allocator pools.
+  size_t poolCount;
+
+  inline JitAllocatorPrivateImpl(JitAllocatorPool* pools, size_t poolCount) noexcept
+    : JitAllocator::Impl {},
+      pageSize(0),
+      allocationCount(0),
+      pools(pools),
+      poolCount(poolCount) {}
+  inline ~JitAllocatorPrivateImpl() noexcept {}
+};
+
+static const JitAllocator::Impl JitAllocatorImpl_none {};
+static const JitAllocator::CreateParams JitAllocatorParams_none {};
+
+// JitAllocator - Utilities
+// ========================
+
+static inline JitAllocatorPrivateImpl* JitAllocatorImpl_new(const JitAllocator::CreateParams* params) noexcept {
+  VirtMem::Info vmInfo = VirtMem::info();
+
+  if (!params)
+    params = &JitAllocatorParams_none;
+
+  JitAllocatorOptions options = params->options;
+  uint32_t blockSize = params->blockSize;
+  uint32_t granularity = params->granularity;
+  uint32_t fillPattern = params->fillPattern;
+
+  // Setup pool count to [1..3].
+  size_t poolCount = 1;
+  if (Support::test(options, JitAllocatorOptions::kUseMultiplePools))
+    poolCount = kJitAllocatorMultiPoolCount;;
+
+  // Setup block size [64kB..256MB].
+  if (blockSize < 64 * 1024 || blockSize > 256 * 1024 * 1024 || !Support::isPowerOf2(blockSize))
+    blockSize = vmInfo.pageGranularity;
+
+  // Setup granularity [64..256].
+  if (granularity < 64 || granularity > 256 || !Support::isPowerOf2(granularity))
+    granularity = kJitAllocatorBaseGranularity;
+
+  // Setup fill-pattern.
+  if (uint32_t(options & JitAllocatorOptions::kCustomFillPattern) == 0)
+    fillPattern = JitAllocator_defaultFillPattern();
+
+  size_t size = sizeof(JitAllocatorPrivateImpl) + sizeof(JitAllocatorPool) * poolCount;
+  void* p = ::malloc(size);
+  if (ASMJIT_UNLIKELY(!p))
+    return nullptr;
+
+  JitAllocatorPool* pools = reinterpret_cast<JitAllocatorPool*>((uint8_t*)p + sizeof(JitAllocatorPrivateImpl));
+  JitAllocatorPrivateImpl* impl = new(p) JitAllocatorPrivateImpl(pools, poolCount);
+
+  impl->options = options;
+  impl->blockSize = blockSize;
+  impl->granularity = granularity;
+  impl->fillPattern = fillPattern;
+  impl->pageSize = vmInfo.pageSize;
+
+  for (size_t poolId = 0; poolId < poolCount; poolId++)
+    new(&pools[poolId]) JitAllocatorPool(granularity << poolId);
+
+  return impl;
+}
+
+static inline void JitAllocatorImpl_destroy(JitAllocatorPrivateImpl* impl) noexcept {
+  impl->~JitAllocatorPrivateImpl();
+  ::free(impl);
+}
+
+static inline size_t JitAllocatorImpl_sizeToPoolId(const JitAllocatorPrivateImpl* impl, size_t size) noexcept {
+  size_t poolId = impl->poolCount - 1;
+  size_t granularity = size_t(impl->granularity) << poolId;
+
+  while (poolId) {
+    if (Support::alignUp(size, granularity) == size)
+      break;
+    poolId--;
+    granularity >>= 1;
+  }
+
+  return poolId;
+}
+
+static inline size_t JitAllocatorImpl_bitVectorSizeToByteSize(uint32_t areaSize) noexcept {
+  using Support::kBitWordSizeInBits;
+  return ((areaSize + kBitWordSizeInBits - 1u) / kBitWordSizeInBits) * sizeof(Support::BitWord);
+}
+
+static inline size_t JitAllocatorImpl_calculateIdealBlockSize(JitAllocatorPrivateImpl* impl, JitAllocatorPool* pool, size_t allocationSize) noexcept {
+  JitAllocatorBlock* last = pool->blocks.last();
+  size_t blockSize = last ? last->blockSize() : size_t(impl->blockSize);
+
+  if (blockSize < kJitAllocatorMaxBlockSize)
+    blockSize *= 2u;
+
+  if (allocationSize > blockSize) {
+    blockSize = Support::alignUp(allocationSize, impl->blockSize);
+    if (ASMJIT_UNLIKELY(blockSize < allocationSize))
+      return 0; // Overflown.
+  }
+
+  return blockSize;
+}
+
+ASMJIT_FAVOR_SPEED static void JitAllocatorImpl_fillPattern(void* mem, uint32_t pattern, size_t sizeInBytes) noexcept {
+  size_t n = sizeInBytes / 4u;
+  uint32_t* p = static_cast<uint32_t*>(mem);
+
+  for (size_t i = 0; i < n; i++)
+    p[i] = pattern;
+}
+
+// Allocate a new `JitAllocatorBlock` for the given `blockSize`.
+//
+// NOTE: The block doesn't have `kFlagEmpty` flag set, because the new block
+// is only allocated when it's actually needed, so it would be cleared anyway.
+static JitAllocatorBlock* JitAllocatorImpl_newBlock(JitAllocatorPrivateImpl* impl, JitAllocatorPool* pool, size_t blockSize) noexcept {
+  using Support::BitWord;
+  using Support::kBitWordSizeInBits;
+
+  uint32_t areaSize = uint32_t((blockSize + pool->granularity - 1) >> pool->granularityLog2);
+  uint32_t numBitWords = (areaSize + kBitWordSizeInBits - 1u) / kBitWordSizeInBits;
+
+  JitAllocatorBlock* block = static_cast<JitAllocatorBlock*>(::malloc(sizeof(JitAllocatorBlock)));
+  BitWord* bitWords = nullptr;
+  VirtMem::DualMapping virtMem {};
+  Error err = kErrorOutOfMemory;
+
+  if (block != nullptr)
+    bitWords = static_cast<BitWord*>(::malloc(size_t(numBitWords) * 2 * sizeof(BitWord)));
+
+  uint32_t blockFlags = 0;
+  if (bitWords != nullptr) {
+    if (Support::test(impl->options, JitAllocatorOptions::kUseDualMapping)) {
+      err = VirtMem::allocDualMapping(&virtMem, blockSize, VirtMem::MemoryFlags::kAccessRWX);
+      blockFlags |= JitAllocatorBlock::kFlagDualMapped;
+    }
+    else {
+      err = VirtMem::alloc(&virtMem.rx, blockSize, VirtMem::MemoryFlags::kAccessRWX);
+      virtMem.rw = virtMem.rx;
+    }
+  }
+
+  // Out of memory.
+  if (ASMJIT_UNLIKELY(!block || !bitWords || err != kErrorOk)) {
+    if (bitWords)
+      ::free(bitWords);
+
+    if (block)
+      ::free(block);
+
+    return nullptr;
+  }
+
+  // Fill the memory if the secure mode is enabled.
+  if (Support::test(impl->options, JitAllocatorOptions::kFillUnusedMemory)) {
+    VirtMem::ProtectJitReadWriteScope scope(virtMem.rw, blockSize);
+    JitAllocatorImpl_fillPattern(virtMem.rw, impl->fillPattern, blockSize);
+  }
+
+  memset(bitWords, 0, size_t(numBitWords) * 2 * sizeof(BitWord));
+  return new(block) JitAllocatorBlock(pool, virtMem, blockSize, blockFlags, bitWords, bitWords + numBitWords, areaSize);
+}
+
+static void JitAllocatorImpl_deleteBlock(JitAllocatorPrivateImpl* impl, JitAllocatorBlock* block) noexcept {
+  DebugUtils::unused(impl);
+
+  if (block->hasFlag(JitAllocatorBlock::kFlagDualMapped))
+    VirtMem::releaseDualMapping(&block->_mapping, block->blockSize());
+  else
+    VirtMem::release(block->rxPtr(), block->blockSize());
+
+  ::free(block->_usedBitVector);
+  ::free(block);
+}
+
+static void JitAllocatorImpl_insertBlock(JitAllocatorPrivateImpl* impl, JitAllocatorBlock* block) noexcept {
+  JitAllocatorPool* pool = block->pool();
+
+  if (!pool->cursor)
+    pool->cursor = block;
+
+  // Add to RBTree and List.
+  impl->tree.insert(block);
+  pool->blocks.append(block);
+
+  // Update statistics.
+  pool->blockCount++;
+  pool->totalAreaSize += block->areaSize();
+  pool->totalOverheadBytes += sizeof(JitAllocatorBlock) + JitAllocatorImpl_bitVectorSizeToByteSize(block->areaSize()) * 2u;
+}
+
+static void JitAllocatorImpl_removeBlock(JitAllocatorPrivateImpl* impl, JitAllocatorBlock* block) noexcept {
+  JitAllocatorPool* pool = block->pool();
+
+  // Remove from RBTree and List.
+  if (pool->cursor == block)
+    pool->cursor = block->hasPrev() ? block->prev() : block->next();
+
+  impl->tree.remove(block);
+  pool->blocks.unlink(block);
+
+  // Update statistics.
+  pool->blockCount--;
+  pool->totalAreaSize -= block->areaSize();
+  pool->totalOverheadBytes -= sizeof(JitAllocatorBlock) + JitAllocatorImpl_bitVectorSizeToByteSize(block->areaSize()) * 2u;
+}
+
+static void JitAllocatorImpl_wipeOutBlock(JitAllocatorPrivateImpl* impl, JitAllocatorBlock* block) noexcept {
+  if (block->hasFlag(JitAllocatorBlock::kFlagEmpty))
+    return;
+
+  JitAllocatorPool* pool = block->pool();
+  uint32_t areaSize = block->areaSize();
+  uint32_t granularity = pool->granularity;
+  size_t numBitWords = pool->bitWordCountFromAreaSize(areaSize);
+
+  VirtMem::protectJitMemory(VirtMem::ProtectJitAccess::kReadWrite);
+  if (Support::test(impl->options, JitAllocatorOptions::kFillUnusedMemory)) {
+    uint8_t* rwPtr = block->rwPtr();
+    BitVectorRangeIterator<Support::BitWord, 0> it(block->_usedBitVector, pool->bitWordCountFromAreaSize(block->areaSize()));
+
+    size_t rangeStart;
+    size_t rangeEnd;
+
+    while (it.nextRange(&rangeStart, &rangeEnd)) {
+      uint8_t* spanPtr = rwPtr + rangeStart * granularity;
+      size_t spanSize = (rangeEnd - rangeStart) * granularity;
+
+      JitAllocatorImpl_fillPattern(spanPtr, impl->fillPattern, spanSize);
+      VirtMem::flushInstructionCache(spanPtr, spanSize);
+    }
+  }
+  VirtMem::protectJitMemory(VirtMem::ProtectJitAccess::kReadExecute);
+
+  memset(block->_usedBitVector, 0, size_t(numBitWords) * sizeof(Support::BitWord));
+  memset(block->_stopBitVector, 0, size_t(numBitWords) * sizeof(Support::BitWord));
+
+  block->_areaUsed = 0;
+  block->_largestUnusedArea = areaSize;
+  block->_searchStart = 0;
+  block->_searchEnd = areaSize;
+  block->addFlags(JitAllocatorBlock::kFlagEmpty);
+  block->clearFlags(JitAllocatorBlock::kFlagDirty);
+}
+
+// JitAllocator - Construction & Destruction
+// =========================================
+
+JitAllocator::JitAllocator(const CreateParams* params) noexcept {
+  _impl = JitAllocatorImpl_new(params);
+  if (ASMJIT_UNLIKELY(!_impl))
+    _impl = const_cast<JitAllocator::Impl*>(&JitAllocatorImpl_none);
+}
+
+JitAllocator::~JitAllocator() noexcept {
+  if (_impl == &JitAllocatorImpl_none)
+    return;
+
+  reset(ResetPolicy::kHard);
+  JitAllocatorImpl_destroy(static_cast<JitAllocatorPrivateImpl*>(_impl));
+}
+
+// JitAllocator - Reset
+// ====================
+
+void JitAllocator::reset(ResetPolicy resetPolicy) noexcept {
+  if (_impl == &JitAllocatorImpl_none)
+    return;
+
+  JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
+  impl->tree.reset();
+  size_t poolCount = impl->poolCount;
+
+  for (size_t poolId = 0; poolId < poolCount; poolId++) {
+    JitAllocatorPool& pool = impl->pools[poolId];
+    JitAllocatorBlock* block = pool.blocks.first();
+
+    JitAllocatorBlock* blockToKeep = nullptr;
+    if (resetPolicy != ResetPolicy::kHard && uint32_t(impl->options & JitAllocatorOptions::kImmediateRelease) == 0) {
+      blockToKeep = block;
+      block = block->next();
+    }
+
+    while (block) {
+      JitAllocatorBlock* next = block->next();
+      JitAllocatorImpl_deleteBlock(impl, block);
+      block = next;
+    }
+
+    pool.reset();
+
+    if (blockToKeep) {
+      blockToKeep->_listNodes[0] = nullptr;
+      blockToKeep->_listNodes[1] = nullptr;
+      JitAllocatorImpl_wipeOutBlock(impl, blockToKeep);
+      JitAllocatorImpl_insertBlock(impl, blockToKeep);
+      pool.emptyBlockCount = 1;
+    }
+  }
+}
+
+// JitAllocator - Statistics
+// =========================
+
+JitAllocator::Statistics JitAllocator::statistics() const noexcept {
+  Statistics statistics;
+  statistics.reset();
+
+  if (ASMJIT_LIKELY(_impl != &JitAllocatorImpl_none)) {
+    JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
+    LockGuard guard(impl->lock);
+
+    size_t poolCount = impl->poolCount;
+    for (size_t poolId = 0; poolId < poolCount; poolId++) {
+      const JitAllocatorPool& pool = impl->pools[poolId];
+      statistics._blockCount   += size_t(pool.blockCount);
+      statistics._reservedSize += size_t(pool.totalAreaSize) * pool.granularity;
+      statistics._usedSize     += size_t(pool.totalAreaUsed) * pool.granularity;
+      statistics._overheadSize += size_t(pool.totalOverheadBytes);
+    }
+
+    statistics._allocationCount = impl->allocationCount;
+  }
+
+  return statistics;
+}
+
+// JitAllocator - Alloc & Release
+// ==============================
+
+Error JitAllocator::alloc(void** rxPtrOut, void** rwPtrOut, size_t size) noexcept {
+  if (ASMJIT_UNLIKELY(_impl == &JitAllocatorImpl_none))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
+  constexpr uint32_t kNoIndex = std::numeric_limits<uint32_t>::max();
+
+  *rxPtrOut = nullptr;
+  *rwPtrOut = nullptr;
+
+  // Align to the minimum granularity by default.
+  size = Support::alignUp<size_t>(size, impl->granularity);
+  if (ASMJIT_UNLIKELY(size == 0))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  if (ASMJIT_UNLIKELY(size > std::numeric_limits<uint32_t>::max() / 2))
+    return DebugUtils::errored(kErrorTooLarge);
+
+  LockGuard guard(impl->lock);
+  JitAllocatorPool* pool = &impl->pools[JitAllocatorImpl_sizeToPoolId(impl, size)];
+
+  uint32_t areaIndex = kNoIndex;
+  uint32_t areaSize = uint32_t(pool->areaSizeFromByteSize(size));
+
+  // Try to find the requested memory area in existing blocks.
+  JitAllocatorBlock* block = pool->blocks.first();
+  if (block) {
+    JitAllocatorBlock* initial = block;
+    do {
+      JitAllocatorBlock* next = block->hasNext() ? block->next() : pool->blocks.first();
+      if (block->areaAvailable() >= areaSize) {
+        if (block->isDirty() || block->largestUnusedArea() >= areaSize) {
+          BitVectorRangeIterator<Support::BitWord, 0> it(block->_usedBitVector, pool->bitWordCountFromAreaSize(block->areaSize()), block->_searchStart, block->_searchEnd);
+
+          size_t rangeStart = 0;
+          size_t rangeEnd = block->areaSize();
+
+          size_t searchStart = SIZE_MAX;
+          size_t largestArea = 0;
+
+          while (it.nextRange(&rangeStart, &rangeEnd, areaSize)) {
+            size_t rangeSize = rangeEnd - rangeStart;
+            if (rangeSize >= areaSize) {
+              areaIndex = uint32_t(rangeStart);
+              break;
+            }
+
+            searchStart = Support::min(searchStart, rangeStart);
+            largestArea = Support::max(largestArea, rangeSize);
+          }
+
+          if (areaIndex != kNoIndex)
+            break;
+
+          if (searchStart != SIZE_MAX) {
+            // Because we have iterated over the entire block, we can now mark the
+            // largest unused area that can be used to cache the next traversal.
+            size_t searchEnd = rangeEnd;
+
+            block->_searchStart = uint32_t(searchStart);
+            block->_searchEnd = uint32_t(searchEnd);
+            block->_largestUnusedArea = uint32_t(largestArea);
+            block->clearFlags(JitAllocatorBlock::kFlagDirty);
+          }
+        }
+      }
+
+      block = next;
+    } while (block != initial);
+  }
+
+  // Allocate a new block if there is no region of a required width.
+  if (areaIndex == kNoIndex) {
+    size_t blockSize = JitAllocatorImpl_calculateIdealBlockSize(impl, pool, size);
+    if (ASMJIT_UNLIKELY(!blockSize))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    block = JitAllocatorImpl_newBlock(impl, pool, blockSize);
+    areaIndex = 0;
+
+    if (ASMJIT_UNLIKELY(!block))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    JitAllocatorImpl_insertBlock(impl, block);
+    block->_searchStart = areaSize;
+    block->_largestUnusedArea = block->areaSize() - areaSize;
+  }
+  else if (block->hasFlag(JitAllocatorBlock::kFlagEmpty)) {
+    pool->emptyBlockCount--;
+    block->clearFlags(JitAllocatorBlock::kFlagEmpty);
+  }
+
+  // Update statistics.
+  impl->allocationCount++;
+  block->markAllocatedArea(areaIndex, areaIndex + areaSize);
+
+  // Return a pointer to the allocated memory.
+  size_t offset = pool->byteSizeFromAreaSize(areaIndex);
+  ASMJIT_ASSERT(offset <= block->blockSize() - size);
+
+  *rxPtrOut = block->rxPtr() + offset;
+  *rwPtrOut = block->rwPtr() + offset;
+  return kErrorOk;
+}
+
+Error JitAllocator::release(void* rxPtr) noexcept {
+  if (ASMJIT_UNLIKELY(_impl == &JitAllocatorImpl_none))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (ASMJIT_UNLIKELY(!rxPtr))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
+  LockGuard guard(impl->lock);
+
+  JitAllocatorBlock* block = impl->tree.get(static_cast<uint8_t*>(rxPtr));
+  if (ASMJIT_UNLIKELY(!block))
+    return DebugUtils::errored(kErrorInvalidState);
+
+  // Offset relative to the start of the block.
+  JitAllocatorPool* pool = block->pool();
+  size_t offset = (size_t)((uint8_t*)rxPtr - block->rxPtr());
+
+  // The first bit representing the allocated area and its size.
+  uint32_t areaIndex = uint32_t(offset >> pool->granularityLog2);
+  uint32_t areaEnd = uint32_t(Support::bitVectorIndexOf(block->_stopBitVector, areaIndex, true)) + 1;
+  uint32_t areaSize = areaEnd - areaIndex;
+
+  impl->allocationCount--;
+  block->markReleasedArea(areaIndex, areaEnd);
+
+  // Fill the released memory if the secure mode is enabled.
+  if (Support::test(impl->options, JitAllocatorOptions::kFillUnusedMemory)) {
+    uint8_t* spanPtr = block->rwPtr() + areaIndex * pool->granularity;
+    size_t spanSize = areaSize * pool->granularity;
+
+    VirtMem::ProtectJitReadWriteScope scope(spanPtr, spanSize);
+    JitAllocatorImpl_fillPattern(spanPtr, impl->fillPattern, spanSize);
+  }
+
+  // Release the whole block if it became empty.
+  if (block->areaUsed() == 0) {
+    if (pool->emptyBlockCount || Support::test(impl->options, JitAllocatorOptions::kImmediateRelease)) {
+      JitAllocatorImpl_removeBlock(impl, block);
+      JitAllocatorImpl_deleteBlock(impl, block);
+    }
+    else {
+      pool->emptyBlockCount++;
+    }
+  }
+
+  return kErrorOk;
+}
+
+Error JitAllocator::shrink(void* rxPtr, size_t newSize) noexcept {
+  if (ASMJIT_UNLIKELY(_impl == &JitAllocatorImpl_none))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (ASMJIT_UNLIKELY(!rxPtr))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  if (ASMJIT_UNLIKELY(newSize == 0))
+    return release(rxPtr);
+
+  JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
+  LockGuard guard(impl->lock);
+  JitAllocatorBlock* block = impl->tree.get(static_cast<uint8_t*>(rxPtr));
+
+  if (ASMJIT_UNLIKELY(!block))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  // Offset relative to the start of the block.
+  JitAllocatorPool* pool = block->pool();
+  size_t offset = (size_t)((uint8_t*)rxPtr - block->rxPtr());
+
+  // The first bit representing the allocated area and its size.
+  uint32_t areaStart = uint32_t(offset >> pool->granularityLog2);
+  uint32_t areaEnd = uint32_t(Support::bitVectorIndexOf(block->_stopBitVector, areaStart, true)) + 1;
+
+  uint32_t areaPrevSize = areaEnd - areaStart;
+  uint32_t areaShrunkSize = pool->areaSizeFromByteSize(newSize);
+
+  if (ASMJIT_UNLIKELY(areaShrunkSize > areaPrevSize))
+    return DebugUtils::errored(kErrorInvalidState);
+
+  uint32_t areaDiff = areaPrevSize - areaShrunkSize;
+  if (areaDiff) {
+    block->markShrunkArea(areaStart + areaShrunkSize, areaEnd);
+
+    // Fill released memory if the secure mode is enabled.
+    if (Support::test(impl->options, JitAllocatorOptions::kFillUnusedMemory))
+      JitAllocatorImpl_fillPattern(block->rwPtr() + (areaStart + areaShrunkSize) * pool->granularity, fillPattern(), areaDiff * pool->granularity);
+  }
+
+  return kErrorOk;
+}
+
+// JitAllocator - Tests
+// ====================
+
+#if defined(ASMJIT_TEST)
+// A pseudo random number generator based on a paper by Sebastiano Vigna:
+//   http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf
+class Random {
+public:
+  // Constants suggested as `23/18/5`.
+  enum Steps : uint32_t {
+    kStep1_SHL = 23,
+    kStep2_SHR = 18,
+    kStep3_SHR = 5
+  };
+
+  inline explicit Random(uint64_t seed = 0) noexcept { reset(seed); }
+  inline Random(const Random& other) noexcept = default;
+
+  inline void reset(uint64_t seed = 0) noexcept {
+    // The number is arbitrary, it means nothing.
+    constexpr uint64_t kZeroSeed = 0x1F0A2BE71D163FA0u;
+
+    // Generate the state data by using splitmix64.
+    for (uint32_t i = 0; i < 2; i++) {
+      seed += 0x9E3779B97F4A7C15u;
+      uint64_t x = seed;
+      x = (x ^ (x >> 30)) * 0xBF58476D1CE4E5B9u;
+      x = (x ^ (x >> 27)) * 0x94D049BB133111EBu;
+      x = (x ^ (x >> 31));
+      _state[i] = x != 0 ? x : kZeroSeed;
+    }
+  }
+
+  inline uint32_t nextUInt32() noexcept {
+    return uint32_t(nextUInt64() >> 32);
+  }
+
+  inline uint64_t nextUInt64() noexcept {
+    uint64_t x = _state[0];
+    uint64_t y = _state[1];
+
+    x ^= x << kStep1_SHL;
+    y ^= y >> kStep3_SHR;
+    x ^= x >> kStep2_SHR;
+    x ^= y;
+
+    _state[0] = y;
+    _state[1] = x;
+    return x + y;
+  }
+
+  uint64_t _state[2];
+};
+
+// Helper class to verify that JitAllocator doesn't return addresses that overlap.
+class JitAllocatorWrapper {
+public:
+  // Address to a memory region of a given size.
+  class Range {
+  public:
+    inline Range(uint8_t* addr, size_t size) noexcept
+      : addr(addr),
+        size(size) {}
+    uint8_t* addr;
+    size_t size;
+  };
+
+  // Based on JitAllocator::Block, serves our purpose well...
+  class Record : public ZoneTreeNodeT<Record>,
+                 public Range {
+  public:
+    inline Record(uint8_t* addr, size_t size)
+      : ZoneTreeNodeT<Record>(),
+        Range(addr, size) {}
+
+    inline bool operator<(const Record& other) const noexcept { return addr < other.addr; }
+    inline bool operator>(const Record& other) const noexcept { return addr > other.addr; }
+
+    inline bool operator<(const uint8_t* key) const noexcept { return addr + size <= key; }
+    inline bool operator>(const uint8_t* key) const noexcept { return addr > key; }
+  };
+
+  Zone _zone;
+  ZoneAllocator _heap;
+  ZoneTree<Record> _records;
+  JitAllocator _allocator;
+
+  explicit JitAllocatorWrapper(const JitAllocator::CreateParams* params) noexcept
+    : _zone(1024 * 1024),
+      _heap(&_zone),
+      _allocator(params) {}
+
+  void _insert(void* p_, size_t size) noexcept {
+    uint8_t* p = static_cast<uint8_t*>(p_);
+    uint8_t* pEnd = p + size - 1;
+
+    Record* record;
+
+    record = _records.get(p);
+    if (record)
+      EXPECT(record == nullptr, "Address [%p:%p] collides with a newly allocated [%p:%p]\n", record->addr, record->addr + record->size, p, p + size);
+
+    record = _records.get(pEnd);
+    if (record)
+      EXPECT(record == nullptr, "Address [%p:%p] collides with a newly allocated [%p:%p]\n", record->addr, record->addr + record->size, p, p + size);
+
+    record = _heap.newT<Record>(p, size);
+    EXPECT(record != nullptr, "Out of memory, cannot allocate 'Record'");
+
+    _records.insert(record);
+  }
+
+  void _remove(void* p) noexcept {
+    Record* record = _records.get(static_cast<uint8_t*>(p));
+    EXPECT(record != nullptr, "Address [%p] doesn't exist\n", p);
+
+    _records.remove(record);
+    _heap.release(record, sizeof(Record));
+  }
+
+  void* alloc(size_t size) noexcept {
+    void* rxPtr;
+    void* rwPtr;
+
+    Error err = _allocator.alloc(&rxPtr, &rwPtr, size);
+    EXPECT(err == kErrorOk, "JitAllocator failed to allocate %zu bytes\n", size);
+
+    _insert(rxPtr, size);
+    return rxPtr;
+  }
+
+  void release(void* p) noexcept {
+    _remove(p);
+    EXPECT(_allocator.release(p) == kErrorOk, "JitAllocator failed to release '%p'\n", p);
+  }
+
+  void shrink(void* p, size_t newSize) noexcept {
+    Record* record = _records.get(static_cast<uint8_t*>(p));
+    EXPECT(record != nullptr, "Address [%p] doesn't exist\n", p);
+
+    if (!newSize)
+      return release(p);
+
+    Error err = _allocator.shrink(p, newSize);
+    EXPECT(err == kErrorOk, "JitAllocator failed to shrink %p to %zu bytes\n", p, newSize);
+
+    record->size = newSize;
+  }
+};
+
+static void JitAllocatorTest_shuffle(void** ptrArray, size_t count, Random& prng) noexcept {
+  for (size_t i = 0; i < count; ++i)
+    std::swap(ptrArray[i], ptrArray[size_t(prng.nextUInt32() % count)]);
+}
+
+static void JitAllocatorTest_usage(JitAllocator& allocator) noexcept {
+  JitAllocator::Statistics stats = allocator.statistics();
+  INFO("    Block Count       : %9llu [Blocks]"        , (unsigned long long)(stats.blockCount()));
+  INFO("    Reserved (VirtMem): %9llu [Bytes]"         , (unsigned long long)(stats.reservedSize()));
+  INFO("    Used     (VirtMem): %9llu [Bytes] (%.1f%%)", (unsigned long long)(stats.usedSize()), stats.usedSizeAsPercent());
+  INFO("    Overhead (HeapMem): %9llu [Bytes] (%.1f%%)", (unsigned long long)(stats.overheadSize()), stats.overheadSizeAsPercent());
+}
+
+template<typename T, size_t kPatternSize, bool Bit>
+static void BitVectorRangeIterator_testRandom(Random& rnd, size_t count) noexcept {
+  for (size_t i = 0; i < count; i++) {
+    T in[kPatternSize];
+    T out[kPatternSize];
+
+    for (size_t j = 0; j < kPatternSize; j++) {
+      in[j] = T(uint64_t(rnd.nextUInt32() & 0xFFu) * 0x0101010101010101);
+      out[j] = Bit == 0 ? Support::allOnes<T>() : T(0);
+    }
+
+    {
+      BitVectorRangeIterator<T, Bit> it(in, kPatternSize);
+      size_t rangeStart, rangeEnd;
+      while (it.nextRange(&rangeStart, &rangeEnd)) {
+        if (Bit)
+          Support::bitVectorFill(out, rangeStart, rangeEnd - rangeStart);
+        else
+          Support::bitVectorClear(out, rangeStart, rangeEnd - rangeStart);
+      }
+    }
+
+    for (size_t j = 0; j < kPatternSize; j++) {
+      EXPECT(in[j] == out[j], "Invalid pattern detected at [%zu] (%llX != %llX)", j, (unsigned long long)in[j], (unsigned long long)out[j]);
+    }
+  }
+}
+
+UNIT(jit_allocator) {
+  size_t kCount = BrokenAPI::hasArg("--quick") ? 1000 : 100000;
+
+  struct TestParams {
+    const char* name;
+    JitAllocatorOptions options;
+    uint32_t blockSize;
+    uint32_t granularity;
+  };
+
+  static TestParams testParams[] = {
+    { "Default", JitAllocatorOptions::kNone, 0, 0 },
+    { "16MB blocks", JitAllocatorOptions::kNone, 16 * 1024 * 1024, 0 },
+    { "256B granularity", JitAllocatorOptions::kNone, 0, 256 },
+    { "kUseDualMapping", JitAllocatorOptions::kUseDualMapping, 0, 0 },
+    { "kUseMultiplePools", JitAllocatorOptions::kUseMultiplePools, 0, 0 },
+    { "kFillUnusedMemory", JitAllocatorOptions::kFillUnusedMemory, 0, 0 },
+    { "kImmediateRelease", JitAllocatorOptions::kImmediateRelease, 0, 0 },
+    { "kUseDualMapping | kFillUnusedMemory", JitAllocatorOptions::kUseDualMapping | JitAllocatorOptions::kFillUnusedMemory, 0, 0 }
+  };
+
+  INFO("BitVectorRangeIterator<uint32_t>");
+  {
+    Random rnd;
+    BitVectorRangeIterator_testRandom<uint32_t, 64, 0>(rnd, kCount);
+  }
+
+  INFO("BitVectorRangeIterator<uint64_t>");
+  {
+    Random rnd;
+    BitVectorRangeIterator_testRandom<uint64_t, 64, 0>(rnd, kCount);
+  }
+
+  for (uint32_t testId = 0; testId < ASMJIT_ARRAY_SIZE(testParams); testId++) {
+    INFO("JitAllocator(%s)", testParams[testId].name);
+
+    JitAllocator::CreateParams params {};
+    params.options = testParams[testId].options;
+    params.blockSize = testParams[testId].blockSize;
+    params.granularity = testParams[testId].granularity;
+
+    size_t fixedBlockSize = 256;
+
+    JitAllocatorWrapper wrapper(&params);
+    Random prng(100);
+
+    size_t i;
+
+    INFO("  Memory alloc/release test - %d allocations", kCount);
+
+    void** ptrArray = (void**)::malloc(sizeof(void*) * size_t(kCount));
+    EXPECT(ptrArray != nullptr,
+          "Couldn't allocate '%u' bytes for pointer-array", unsigned(sizeof(void*) * size_t(kCount)));
+
+    // Random blocks tests...
+    INFO("  Allocating random blocks...");
+    for (i = 0; i < kCount; i++)
+      ptrArray[i] = wrapper.alloc((prng.nextUInt32() % 1024) + 8);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Releasing all allocated blocks from the beginning...");
+    for (i = 0; i < kCount; i++)
+      wrapper.release(ptrArray[i]);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Allocating random blocks again...", kCount);
+    for (i = 0; i < kCount; i++)
+      ptrArray[i] = wrapper.alloc((prng.nextUInt32() % 1024) + 8);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Shuffling allocated blocks...");
+    JitAllocatorTest_shuffle(ptrArray, unsigned(kCount), prng);
+
+    INFO("  Releasing 50%% of allocated blocks...");
+    for (i = 0; i < kCount / 2; i++)
+      wrapper.release(ptrArray[i]);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Allocating 50%% more blocks again...");
+    for (i = 0; i < kCount / 2; i++)
+      ptrArray[i] = wrapper.alloc((prng.nextUInt32() % 1024) + 8);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Releasing all allocated blocks from the end...");
+    for (i = 0; i < kCount; i++)
+      wrapper.release(ptrArray[kCount - i - 1]);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    // Fixed blocks tests...
+    INFO("  Allocating %zuB blocks...", fixedBlockSize);
+    for (i = 0; i < kCount / 2; i++)
+      ptrArray[i] = wrapper.alloc(fixedBlockSize);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Shrinking each %zuB block to 1 byte", fixedBlockSize);
+    for (i = 0; i < kCount / 2; i++)
+      wrapper.shrink(ptrArray[i], 1);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Allocating more 64B blocks...", 64);
+    for (i = kCount / 2; i < kCount; i++)
+      ptrArray[i] = wrapper.alloc(64);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Releasing all blocks from the beginning...");
+    for (i = 0; i < kCount; i++)
+      wrapper.release(ptrArray[i]);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Allocating %zuB blocks...", fixedBlockSize);
+    for (i = 0; i < kCount; i++)
+      ptrArray[i] = wrapper.alloc(fixedBlockSize);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Shuffling allocated blocks...");
+    JitAllocatorTest_shuffle(ptrArray, unsigned(kCount), prng);
+
+    INFO("  Releasing 50%% of allocated blocks...");
+    for (i = 0; i < kCount / 2; i++)
+      wrapper.release(ptrArray[i]);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Allocating 50%% more %zuB blocks again...", fixedBlockSize);
+    for (i = 0; i < kCount / 2; i++)
+      ptrArray[i] = wrapper.alloc(fixedBlockSize);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Releasing all allocated blocks from the end...");
+    for (i = 0; i < kCount; i++)
+      wrapper.release(ptrArray[kCount - i - 1]);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    ::free(ptrArray);
+  }
+}
+#endif
+
+ASMJIT_END_NAMESPACE
+
+#endif
diff --git a/lib/lepton/asmjit/core/jitallocator.h b/lib/lepton/asmjit/core/jitallocator.h
new file mode 100644
index 0000000000..e8fe69519e
--- /dev/null
+++ b/lib/lepton/asmjit/core/jitallocator.h
@@ -0,0 +1,261 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_JITALLOCATOR_H_INCLUDED
+#define ASMJIT_CORE_JITALLOCATOR_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_JIT
+
+#include "../core/globals.h"
+#include "../core/virtmem.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_virtual_memory
+//! \{
+
+//! Options used by \ref JitAllocator.
+enum class JitAllocatorOptions : uint32_t {
+  //! No options.
+  kNone = 0,
+
+  //! Enables the use of an anonymous memory-mapped memory that is mapped into two buffers having a different pointer.
+  //! The first buffer has read and execute permissions and the second buffer has read+write permissions.
+  //!
+  //! See \ref VirtMem::allocDualMapping() for more details about this feature.
+  kUseDualMapping = 0x00000001u,
+
+  //! Enables the use of multiple pools with increasing granularity instead of a single pool. This flag would enable
+  //! 3 internal pools in total having 64, 128, and 256 bytes granularity.
+  //!
+  //! This feature is only recommended for users that generate a lot of code and would like to minimize the overhead
+  //! of `JitAllocator` itself by having blocks of different allocation granularities. Using this feature only for
+  //! few allocations won't pay off as the allocator may need to create more blocks initially before it can take the
+  //! advantage of variable block granularity.
+  kUseMultiplePools = 0x00000002u,
+
+  //! Always fill reserved memory by a fill-pattern.
+  //!
+  //! Causes a new block to be cleared by the fill pattern and freshly released memory to be cleared before making
+  //! it ready for another use.
+  kFillUnusedMemory = 0x00000004u,
+
+  //! When this flag is set the allocator would immediately release unused blocks during `release()` or `reset()`.
+  //! When this flag is not set the allocator would keep one empty block in each pool to prevent excessive virtual
+  //! memory allocations and deallocations in border cases, which involve constantly allocating and deallocating a
+  //! single block caused by repetitive calling `alloc()` and `release()` when the allocator has either no blocks
+  //! or have all blocks fully occupied.
+  kImmediateRelease = 0x00000008u,
+
+  //! Use a custom fill pattern, must be combined with `kFlagFillUnusedMemory`.
+  kCustomFillPattern = 0x10000000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(JitAllocatorOptions)
+
+//! A simple implementation of memory manager that uses `asmjit::VirtMem`
+//! functions to manage virtual memory for JIT compiled code.
+//!
+//! Implementation notes:
+//!
+//! - Granularity of allocated blocks is different than granularity for a typical C malloc. In addition, the allocator
+//!   can use several memory pools having a different granularity to minimize the maintenance overhead. Multiple pools
+//!   feature requires `kFlagUseMultiplePools` flag to be set.
+//!
+//! - The allocator doesn't store any information in executable memory, instead, the implementation uses two
+//!   bit-vectors to manage allocated memory of each allocator-block. The first bit-vector called 'used' is used to
+//!   track used memory (where each bit represents memory size defined by granularity) and the second bit vector called
+//!   'stop' is used as a sentinel to mark where the allocated area ends.
+//!
+//! - Internally, the allocator also uses RB tree to keep track of all blocks across all pools. Each inserted block is
+//!   added to the tree so it can be matched fast during `release()` and `shrink()`.
+class JitAllocator {
+public:
+  ASMJIT_NONCOPYABLE(JitAllocator)
+
+  struct Impl {
+    //! Allocator options.
+    JitAllocatorOptions options;
+    //! Base block size (0 if the allocator is not initialized).
+    uint32_t blockSize;
+    //! Base granularity (0 if the allocator is not initialized).
+    uint32_t granularity;
+    //! A pattern that is used to fill unused memory if secure mode is enabled.
+    uint32_t fillPattern;
+  };
+
+  //! Allocator implementation (private).
+  Impl* _impl;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Parameters that can be passed to `JitAllocator` constructor.
+  //!
+  //! Use it like this:
+  //!
+  //! ```
+  //! // Zero initialize (zero means the default value) and change what you need.
+  //! JitAllocator::CreateParams params {};
+  //! params.blockSize = 1024 * 1024;
+  //!
+  //! // Create the allocator.
+  //! JitAllocator allocator(&params);
+  //! ```
+  struct CreateParams {
+    //! Allocator options.
+    //!
+    //! No options are used by default.
+    JitAllocatorOptions options = JitAllocatorOptions::kNone;
+
+    //! Base size of a single block in bytes (default 64kB).
+    //!
+    //! \remarks Block size must be equal to or greater than page size and must be power of 2. If the input is not
+    //! valid then the default block size will be used instead.
+    uint32_t blockSize = 0;
+
+    //! Base granularity (and also natural alignment) of allocations in bytes (default 64).
+    //!
+    //! Since the `JitAllocator` uses bit-arrays to mark used memory the granularity also specifies how many bytes
+    //! correspond to a single bit in such bit-array. Higher granularity means more waste of virtual memory (as it
+    //! increases the natural alignment), but smaller bit-arrays as less bits would be required per a single block.
+    uint32_t granularity = 0;
+
+    //! Patter to use to fill unused memory.
+    //!
+    //! Only used if \ref JitAllocatorOptions::kCustomFillPattern is set.
+    uint32_t fillPattern = 0;
+
+    // Reset the content of `CreateParams`.
+    inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
+  };
+
+  //! Creates a `JitAllocator` instance.
+  ASMJIT_API explicit JitAllocator(const CreateParams* params = nullptr) noexcept;
+  //! Destroys the `JitAllocator` instance and release all blocks held.
+  ASMJIT_API ~JitAllocator() noexcept;
+
+  inline bool isInitialized() const noexcept { return _impl->blockSize == 0; }
+
+  //! Free all allocated memory - makes all pointers returned by `alloc()` invalid.
+  //!
+  //! \remarks This function is not thread-safe as it's designed to be used when nobody else is using allocator.
+  //! The reason is that there is no point of calling `reset()` when the allocator is still in use.
+  ASMJIT_API void reset(ResetPolicy resetPolicy = ResetPolicy::kSoft) noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns allocator options, see `Flags`.
+  inline JitAllocatorOptions options() const noexcept { return _impl->options; }
+  //! Tests whether the allocator has the given `option` set.
+  inline bool hasOption(JitAllocatorOptions option) const noexcept { return uint32_t(_impl->options & option) != 0; }
+
+  //! Returns a base block size (a minimum size of block that the allocator would allocate).
+  inline uint32_t blockSize() const noexcept { return _impl->blockSize; }
+  //! Returns granularity of the allocator.
+  inline uint32_t granularity() const noexcept { return _impl->granularity; }
+  //! Returns pattern that is used to fill unused memory if `kFlagUseFillPattern` is set.
+  inline uint32_t fillPattern() const noexcept { return _impl->fillPattern; }
+
+  //! \}
+
+  //! \name Alloc & Release
+  //! \{
+
+  //! Allocates a new memory block of the requested `size`.
+  //!
+  //! When the function is successful it stores two pointers in `rxPtrOut` and `rwPtrOut`. The pointers will be
+  //! different only if `kOptionUseDualMapping` was used to setup the allocator (in that case the `rxPtrOut` would
+  //! point to a Read+Execute region and `rwPtrOut` would point to a Read+Write region of the same memory-mapped block.
+  ASMJIT_API Error alloc(void** rxPtrOut, void** rwPtrOut, size_t size) noexcept;
+
+  //! Releases a memory block returned by `alloc()`.
+  //!
+  //! \remarks This function is thread-safe.
+  ASMJIT_API Error release(void* rxPtr) noexcept;
+
+  //! Frees extra memory allocated with `rxPtr` by shrinking it to the given `newSize`.
+  //!
+  //! \remarks This function is thread-safe.
+  ASMJIT_API Error shrink(void* rxPtr, size_t newSize) noexcept;
+
+  //! Queries information about an allocated memory block that contains the given `rxPtr`.
+  //!
+  //! The function returns `kErrorOk` when `rxPtr` is matched and fills `rxPtrOut`, `rwPtrOut`, and `sizeOut` output
+  //! arguments. The returned `rxPtrOut` and `rwPtrOut` pointers point to the beginning of the block, and `sizeOut`
+  //! describes the total amount of bytes this allocation uses - `sizeOut` will always be aligned to the allocation
+  //! granularity, so for example if an allocation was 1 byte and the size granularity is 64, the returned `sizeOut`
+  //! will be 64 bytes, because that's what the allocator sees.
+  ASMJIT_API Error query(void* rxPtr, void** rxPtrOut, void** rwPtrOut, size_t* sizeOut) const noexcept;
+
+  //! \}
+
+  //! \name Statistics
+  //! \{
+
+  //! Statistics about `JitAllocator`.
+  struct Statistics {
+    //! Number of blocks `JitAllocator` maintains.
+    size_t _blockCount;
+    //! Number of active allocations.
+    size_t _allocationCount;
+    //! How many bytes are currently used / allocated.
+    size_t _usedSize;
+    //! How many bytes are currently reserved by the allocator.
+    size_t _reservedSize;
+    //! Allocation overhead (in bytes) required to maintain all blocks.
+    size_t _overheadSize;
+
+    inline void reset() noexcept {
+      _blockCount = 0;
+      _usedSize = 0;
+      _reservedSize = 0;
+      _overheadSize = 0;
+    }
+
+    //! Returns count of blocks managed by `JitAllocator` at the moment.
+    inline size_t blockCount() const noexcept { return _blockCount; }
+    //! Returns the number of active allocations.
+    inline size_t allocationCount() const noexcept { return _allocationCount; }
+
+    //! Returns how many bytes are currently used.
+    inline size_t usedSize() const noexcept { return _usedSize; }
+    //! Returns the number of bytes unused by the allocator at the moment.
+    inline size_t unusedSize() const noexcept { return _reservedSize - _usedSize; }
+    //! Returns the total number of bytes bytes reserved by the allocator (sum of sizes of all blocks).
+    inline size_t reservedSize() const noexcept { return _reservedSize; }
+    //! Returns the number of bytes the allocator needs to manage the allocated memory.
+    inline size_t overheadSize() const noexcept { return _overheadSize; }
+
+    inline double usedSizeAsPercent() const noexcept {
+      return (double(usedSize()) / (double(reservedSize()) + 1e-16)) * 100.0;
+    }
+
+    inline double unusedSizeAsPercent() const noexcept {
+      return (double(unusedSize()) / (double(reservedSize()) + 1e-16)) * 100.0;
+    }
+
+    inline double overheadSizeAsPercent() const noexcept {
+      return (double(overheadSize()) / (double(reservedSize()) + 1e-16)) * 100.0;
+    }
+  };
+
+  //! Returns JIT allocator statistics.
+  //!
+  //! \remarks This function is thread-safe.
+  ASMJIT_API Statistics statistics() const noexcept;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif
+#endif
diff --git a/lib/lepton/asmjit/core/jitruntime.cpp b/lib/lepton/asmjit/core/jitruntime.cpp
new file mode 100644
index 0000000000..491c2040fb
--- /dev/null
+++ b/lib/lepton/asmjit/core/jitruntime.cpp
@@ -0,0 +1,80 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_JIT
+
+#include "../core/cpuinfo.h"
+#include "../core/jitruntime.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+JitRuntime::JitRuntime(const JitAllocator::CreateParams* params) noexcept
+  : _allocator(params) {
+  _environment = Environment::host();
+  _environment.setObjectFormat(ObjectFormat::kJIT);
+}
+
+JitRuntime::~JitRuntime() noexcept {}
+
+Error JitRuntime::_add(void** dst, CodeHolder* code) noexcept {
+  *dst = nullptr;
+
+  ASMJIT_PROPAGATE(code->flatten());
+  ASMJIT_PROPAGATE(code->resolveUnresolvedLinks());
+
+  size_t estimatedCodeSize = code->codeSize();
+  if (ASMJIT_UNLIKELY(estimatedCodeSize == 0))
+    return DebugUtils::errored(kErrorNoCodeGenerated);
+
+  uint8_t* rx;
+  uint8_t* rw;
+  ASMJIT_PROPAGATE(_allocator.alloc((void**)&rx, (void**)&rw, estimatedCodeSize));
+
+  // Relocate the code.
+  Error err = code->relocateToBase(uintptr_t((void*)rx));
+  if (ASMJIT_UNLIKELY(err)) {
+    _allocator.release(rx);
+    return err;
+  }
+
+  // Recalculate the final code size and shrink the memory we allocated for it
+  // in case that some relocations didn't require records in an address table.
+  size_t codeSize = code->codeSize();
+  if (codeSize < estimatedCodeSize)
+    _allocator.shrink(rx, codeSize);
+
+  if (codeSize < estimatedCodeSize)
+    _allocator.shrink(rx, codeSize);
+
+  {
+    VirtMem::ProtectJitReadWriteScope rwScope(rx, codeSize);
+
+    for (Section* section : code->_sections) {
+      size_t offset = size_t(section->offset());
+      size_t bufferSize = size_t(section->bufferSize());
+      size_t virtualSize = size_t(section->virtualSize());
+
+      ASMJIT_ASSERT(offset + bufferSize <= codeSize);
+      memcpy(rw + offset, section->data(), bufferSize);
+
+      if (virtualSize > bufferSize) {
+        ASMJIT_ASSERT(offset + virtualSize <= codeSize);
+        memset(rw + offset + bufferSize, 0, virtualSize - bufferSize);
+      }
+    }
+  }
+
+  *dst = rx;
+  return kErrorOk;
+}
+
+Error JitRuntime::_release(void* p) noexcept {
+  return _allocator.release(p);
+}
+
+ASMJIT_END_NAMESPACE
+
+#endif
diff --git a/lib/lepton/asmjit/core/jitruntime.h b/lib/lepton/asmjit/core/jitruntime.h
new file mode 100644
index 0000000000..6f35e21277
--- /dev/null
+++ b/lib/lepton/asmjit/core/jitruntime.h
@@ -0,0 +1,89 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_JITRUNTIME_H_INCLUDED
+#define ASMJIT_CORE_JITRUNTIME_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_JIT
+
+#include "../core/codeholder.h"
+#include "../core/jitallocator.h"
+#include "../core/target.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+class CodeHolder;
+
+//! \addtogroup asmjit_virtual_memory
+//! \{
+
+//! JIT execution runtime is a special `Target` that is designed to store and
+//! execute the generated code.
+class ASMJIT_VIRTAPI JitRuntime : public Target {
+public:
+  ASMJIT_NONCOPYABLE(JitRuntime)
+
+  //! Virtual memory allocator.
+  JitAllocator _allocator;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a `JitRuntime` instance.
+  ASMJIT_API explicit JitRuntime(const JitAllocator::CreateParams* params = nullptr) noexcept;
+  //! Destroys the `JitRuntime` instance.
+  ASMJIT_API virtual ~JitRuntime() noexcept;
+
+  inline void reset(ResetPolicy resetPolicy = ResetPolicy::kSoft) noexcept {
+    _allocator.reset(resetPolicy);
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the associated `JitAllocator`.
+  inline JitAllocator* allocator() const noexcept { return const_cast<JitAllocator*>(&_allocator); }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  // NOTE: To allow passing function pointers to `add()` and `release()` the
+  // virtual methods are prefixed with `_` and called from templates instead.
+
+  //! Allocates memory needed for a code stored in the `CodeHolder` and relocates the code to the pointer allocated.
+  //!
+  //! The beginning of the memory allocated for the function is returned in `dst`. If failed `Error` code is returned
+  //! and `dst` is explicitly set to `nullptr`  (this means that you don't have to set it to null before calling `add()`).
+  template<typename Func>
+  inline Error add(Func* dst, CodeHolder* code) noexcept {
+    return _add(Support::ptr_cast_impl<void**, Func*>(dst), code);
+  }
+
+  //! Releases `p` which was obtained by calling `add()`.
+  template<typename Func>
+  inline Error release(Func p) noexcept {
+    return _release(Support::ptr_cast_impl<void*, Func>(p));
+  }
+
+  //! Type-unsafe version of `add()`.
+  ASMJIT_API virtual Error _add(void** dst, CodeHolder* code) noexcept;
+
+  //! Type-unsafe version of `release()`.
+  ASMJIT_API virtual Error _release(void* p) noexcept;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif
+#endif
diff --git a/lib/lepton/asmjit/core/logger.cpp b/lib/lepton/asmjit/core/logger.cpp
new file mode 100644
index 0000000000..4567b3c938
--- /dev/null
+++ b/lib/lepton/asmjit/core/logger.cpp
@@ -0,0 +1,69 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/logger.h"
+#include "../core/string.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// Logger - Implementation
+// =======================
+
+Logger::Logger() noexcept
+  : _options() {}
+Logger::~Logger() noexcept {}
+
+Error Logger::logf(const char* fmt, ...) noexcept {
+  Error err;
+  va_list ap;
+
+  va_start(ap, fmt);
+  err = logv(fmt, ap);
+  va_end(ap);
+
+  return err;
+}
+
+Error Logger::logv(const char* fmt, va_list ap) noexcept {
+  StringTmp<2048> sb;
+  ASMJIT_PROPAGATE(sb.appendVFormat(fmt, ap));
+  return log(sb);
+}
+
+// FileLogger - Implementation
+// ===========================
+
+FileLogger::FileLogger(FILE* file) noexcept
+  : _file(file) {}
+FileLogger::~FileLogger() noexcept {}
+
+Error FileLogger::_log(const char* data, size_t size) noexcept {
+  if (!_file)
+    return kErrorOk;
+
+  if (size == SIZE_MAX)
+    size = strlen(data);
+
+  fwrite(data, 1, size, _file);
+  return kErrorOk;
+}
+
+// StringLogger - Implementation
+// =============================
+
+StringLogger::StringLogger() noexcept {}
+StringLogger::~StringLogger() noexcept {}
+
+Error StringLogger::_log(const char* data, size_t size) noexcept {
+  return _content.append(data, size);
+}
+
+ASMJIT_END_NAMESPACE
+
+#endif
diff --git a/lib/lepton/asmjit/core/logger.h b/lib/lepton/asmjit/core/logger.h
new file mode 100644
index 0000000000..d416a50d84
--- /dev/null
+++ b/lib/lepton/asmjit/core/logger.h
@@ -0,0 +1,198 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_LOGGING_H_INCLUDED
+#define ASMJIT_CORE_LOGGING_H_INCLUDED
+
+#include "../core/inst.h"
+#include "../core/string.h"
+#include "../core/formatter.h"
+
+#ifndef ASMJIT_NO_LOGGING
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_logging
+//! \{
+
+//! Logging interface.
+//!
+//! This class can be inherited and reimplemented to fit into your own logging needs. When reimplementing a logger
+//! use \ref Logger::_log() method to log customize the output.
+//!
+//! There are two `Logger` implementations offered by AsmJit:
+//!   - \ref FileLogger - logs into a `FILE*`.
+//!   - \ref StringLogger - concatenates all logs into a \ref String.
+class ASMJIT_VIRTAPI Logger {
+public:
+  ASMJIT_BASE_CLASS(Logger)
+  ASMJIT_NONCOPYABLE(Logger)
+
+  //! Format options.
+  FormatOptions _options;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a `Logger` instance.
+  ASMJIT_API Logger() noexcept;
+  //! Destroys the `Logger` instance.
+  ASMJIT_API virtual ~Logger() noexcept;
+
+  //! \}
+
+  //! \name Format Options
+  //! \{
+
+  //! Returns \ref FormatOptions of this logger.
+  inline FormatOptions& options() noexcept { return _options; }
+  //! \overload
+  inline const FormatOptions& options() const noexcept { return _options; }
+  //! Sets formatting options of this Logger to `options`.
+  inline void setOptions(const FormatOptions& options) noexcept { _options = options; }
+  //! Resets formatting options of this Logger to defaults.
+  inline void resetOptions() noexcept { _options.reset(); }
+
+  //! Returns formatting flags.
+  inline FormatFlags flags() const noexcept { return _options.flags(); }
+  //! Tests whether the logger has the given `flag` enabled.
+  inline bool hasFlag(FormatFlags flag) const noexcept { return _options.hasFlag(flag); }
+  //! Sets formatting flags to `flags`.
+  inline void setFlags(FormatFlags flags) noexcept { _options.setFlags(flags); }
+  //! Enables the given formatting `flags`.
+  inline void addFlags(FormatFlags flags) noexcept { _options.addFlags(flags); }
+  //! Disables the given formatting `flags`.
+  inline void clearFlags(FormatFlags flags) noexcept { _options.clearFlags(flags); }
+
+  //! Returns indentation of a given indentation `group`.
+  inline uint32_t indentation(FormatIndentationGroup type) const noexcept { return _options.indentation(type); }
+  //! Sets indentation of the given indentation `group` to `n` spaces.
+  inline void setIndentation(FormatIndentationGroup type, uint32_t n) noexcept { _options.setIndentation(type, n); }
+  //! Resets indentation of the given indentation `group` to 0 spaces.
+  inline void resetIndentation(FormatIndentationGroup type) noexcept { _options.resetIndentation(type); }
+
+  //! Returns padding of a given padding `group`.
+  inline size_t padding(FormatPaddingGroup type) const noexcept { return _options.padding(type); }
+  //! Sets padding of a given padding `group` to `n`.
+  inline void setPadding(FormatPaddingGroup type, uint32_t n) noexcept { _options.setPadding(type, n); }
+  //! Resets padding of a given padding `group` to 0, which means that a default will be used.
+  inline void resetPadding(FormatPaddingGroup type) noexcept { _options.resetPadding(type); }
+
+  //! \}
+
+  //! \name Logging Interface
+  //! \{
+
+  //! Logs `str` - must be reimplemented.
+  //!
+  //! The function can accept either a null terminated string if `size` is `SIZE_MAX` or a non-null terminated
+  //! string of the given `size`. The function cannot assume that the data is null terminated and must handle
+  //! non-null terminated inputs.
+  virtual Error _log(const char* data, size_t size) noexcept = 0;
+
+  //! Logs string `str`, which is either null terminated or having size `size`.
+  inline Error log(const char* data, size_t size = SIZE_MAX) noexcept { return _log(data, size); }
+  //! Logs content of a string `str`.
+  inline Error log(const String& str) noexcept { return _log(str.data(), str.size()); }
+
+  //! Formats the message by using `snprintf()` and then passes the formatted string to \ref _log().
+  ASMJIT_API Error logf(const char* fmt, ...) noexcept;
+
+  //! Formats the message by using `vsnprintf()` and then passes the formatted string to \ref _log().
+  ASMJIT_API Error logv(const char* fmt, va_list ap) noexcept;
+
+  //! \}
+};
+
+//! Logger that can log to a `FILE*`.
+class ASMJIT_VIRTAPI FileLogger : public Logger {
+public:
+  ASMJIT_NONCOPYABLE(FileLogger)
+
+  FILE* _file;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `FileLogger` that logs to `FILE*`.
+  ASMJIT_API FileLogger(FILE* file = nullptr) noexcept;
+  //! Destroys the `FileLogger`.
+  ASMJIT_API virtual ~FileLogger() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the logging output stream or null if the logger has no output stream.
+  inline FILE* file() const noexcept { return _file; }
+
+  //! Sets the logging output stream to `stream` or null.
+  //!
+  //! \note If the `file` is null the logging will be disabled. When a logger is attached to `CodeHolder` or any
+  //! emitter the logging API will always be called regardless of the output file. This means that if you really
+  //! want to disable logging at emitter level you must not attach a logger to it.
+  inline void setFile(FILE* file) noexcept { _file = file; }
+
+  //! \}
+
+  ASMJIT_API Error _log(const char* data, size_t size = SIZE_MAX) noexcept override;
+};
+
+//! Logger that stores everything in an internal string buffer.
+class ASMJIT_VIRTAPI StringLogger : public Logger {
+public:
+  ASMJIT_NONCOPYABLE(StringLogger)
+
+  //! Logger data as string.
+  String _content;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Create new `StringLogger`.
+  ASMJIT_API StringLogger() noexcept;
+  //! Destroys the `StringLogger`.
+  ASMJIT_API virtual ~StringLogger() noexcept;
+
+  //! \}
+
+  //! \name Logger Data Accessors
+  //! \{
+
+  //! Returns the content of the logger as \ref String.
+  //!
+  //! It can be moved, if desired.
+  inline String& content() noexcept { return _content; }
+  //! \overload
+  inline const String& content() const noexcept { return _content; }
+
+  //! Returns aggregated logger data as `char*` pointer.
+  //!
+  //! The pointer is owned by `StringLogger`, it can't be modified or freed.
+  inline const char* data() const noexcept { return _content.data(); }
+  //! Returns size of the data returned by `data()`.
+  inline size_t dataSize() const noexcept { return _content.size(); }
+
+  //! \}
+
+  //! \name Logger Data Manipulation
+  //! \{
+
+  //! Clears the accumulated logger data.
+  inline void clear() noexcept { _content.clear(); }
+
+  //! \}
+
+  ASMJIT_API Error _log(const char* data, size_t size = SIZE_MAX) noexcept override;
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif
+
+#endif // ASMJIT_CORE_LOGGER_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/misc_p.h b/lib/lepton/asmjit/core/misc_p.h
new file mode 100644
index 0000000000..5cd934e462
--- /dev/null
+++ b/lib/lepton/asmjit/core/misc_p.h
@@ -0,0 +1,33 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_MISC_P_H_INCLUDED
+#define ASMJIT_CORE_MISC_P_H_INCLUDED
+
+#include "../core/api-config.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_utilities
+//! \{
+
+#define ASMJIT_LOOKUP_TABLE_4(T, I) T((I)), T((I+1)), T((I+2)), T((I+3))
+#define ASMJIT_LOOKUP_TABLE_8(T, I) ASMJIT_LOOKUP_TABLE_4(T, I), ASMJIT_LOOKUP_TABLE_4(T, I + 4)
+#define ASMJIT_LOOKUP_TABLE_16(T, I) ASMJIT_LOOKUP_TABLE_8(T, I), ASMJIT_LOOKUP_TABLE_8(T, I + 8)
+#define ASMJIT_LOOKUP_TABLE_32(T, I) ASMJIT_LOOKUP_TABLE_16(T, I), ASMJIT_LOOKUP_TABLE_16(T, I + 16)
+#define ASMJIT_LOOKUP_TABLE_40(T, I) ASMJIT_LOOKUP_TABLE_16(T, I), ASMJIT_LOOKUP_TABLE_16(T, I + 16), ASMJIT_LOOKUP_TABLE_8(T, I + 32)
+#define ASMJIT_LOOKUP_TABLE_64(T, I) ASMJIT_LOOKUP_TABLE_32(T, I), ASMJIT_LOOKUP_TABLE_32(T, I + 32)
+#define ASMJIT_LOOKUP_TABLE_128(T, I) ASMJIT_LOOKUP_TABLE_64(T, I), ASMJIT_LOOKUP_TABLE_64(T, I + 64)
+#define ASMJIT_LOOKUP_TABLE_256(T, I) ASMJIT_LOOKUP_TABLE_128(T, I), ASMJIT_LOOKUP_TABLE_128(T, I + 128)
+#define ASMJIT_LOOKUP_TABLE_512(T, I) ASMJIT_LOOKUP_TABLE_256(T, I), ASMJIT_LOOKUP_TABLE_256(T, I + 256)
+#define ASMJIT_LOOKUP_TABLE_1024(T, I) ASMJIT_LOOKUP_TABLE_512(T, I), ASMJIT_LOOKUP_TABLE_512(T, I + 512)
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_MISC_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/operand.cpp b/lib/lepton/asmjit/core/operand.cpp
new file mode 100644
index 0000000000..ee026817f8
--- /dev/null
+++ b/lib/lepton/asmjit/core/operand.cpp
@@ -0,0 +1,132 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/operand.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// Operand - Tests
+// ===============
+
+#if defined(ASMJIT_TEST)
+enum class StrongEnumForImmTests : uint32_t {
+  kValue0,
+  kValue0xFFFFFFFF = 0xFFFFFFFFu
+};
+
+UNIT(operand) {
+  INFO("Checking operand sizes");
+  EXPECT(sizeof(Operand) == 16);
+  EXPECT(sizeof(BaseReg) == 16);
+  EXPECT(sizeof(BaseMem) == 16);
+  EXPECT(sizeof(Imm)     == 16);
+  EXPECT(sizeof(Label)   == 16);
+
+  INFO("Checking basic functionality of Operand");
+  Operand a, b;
+  Operand dummy;
+
+  EXPECT(a.isNone() == true);
+  EXPECT(a.isReg() == false);
+  EXPECT(a.isMem() == false);
+  EXPECT(a.isImm() == false);
+  EXPECT(a.isLabel() == false);
+  EXPECT(a == b);
+  EXPECT(a._data[0] == 0);
+  EXPECT(a._data[1] == 0);
+
+  INFO("Checking basic functionality of Label");
+  Label label;
+  EXPECT(label.isValid() == false);
+  EXPECT(label.id() == Globals::kInvalidId);
+
+  INFO("Checking basic functionality of BaseReg");
+  EXPECT(BaseReg().isReg() == true);
+  EXPECT(BaseReg().isValid() == false);
+  EXPECT(BaseReg()._data[0] == 0);
+  EXPECT(BaseReg()._data[1] == 0);
+  EXPECT(dummy.as<BaseReg>().isValid() == false);
+
+  // Create some register (not specific to any architecture).
+  OperandSignature rSig = OperandSignature::fromOpType(OperandType::kReg) |
+                          OperandSignature::fromRegType(RegType::kVec128) |
+                          OperandSignature::fromRegGroup(RegGroup::kVec) |
+                          OperandSignature::fromSize(8);
+  BaseReg r1(rSig, 5);
+
+  EXPECT(r1.isValid()   == true);
+  EXPECT(r1.isReg()     == true);
+  EXPECT(r1.isReg(RegType::kVec128) == true);
+  EXPECT(r1.isPhysReg() == true);
+  EXPECT(r1.isVirtReg() == false);
+  EXPECT(r1.signature() == rSig);
+  EXPECT(r1.type()      == RegType::kVec128);
+  EXPECT(r1.group()     == RegGroup::kVec);
+  EXPECT(r1.size()      == 8);
+  EXPECT(r1.id()        == 5);
+  EXPECT(r1.isReg(RegType::kVec128, 5) == true); // RegType and Id.
+  EXPECT(r1._data[0]    == 0);
+  EXPECT(r1._data[1]    == 0);
+
+  // The same type of register having different id.
+  BaseReg r2(r1, 6);
+  EXPECT(r2.isValid()   == true);
+  EXPECT(r2.isReg()     == true);
+  EXPECT(r2.isReg(RegType::kVec128) == true);
+  EXPECT(r2.isPhysReg() == true);
+  EXPECT(r2.isVirtReg() == false);
+  EXPECT(r2.signature() == rSig);
+  EXPECT(r2.type()      == r1.type());
+  EXPECT(r2.group()     == r1.group());
+  EXPECT(r2.size()      == r1.size());
+  EXPECT(r2.id()        == 6);
+  EXPECT(r2.isReg(RegType::kVec128, 6) == true);
+
+  r1.reset();
+  EXPECT(!r1.isReg());
+  EXPECT(!r1.isValid());
+
+  INFO("Checking basic functionality of BaseMem");
+  BaseMem m;
+  EXPECT(m.isMem());
+  EXPECT(m == BaseMem());
+  EXPECT(m.hasBase() == false);
+  EXPECT(m.hasIndex() == false);
+  EXPECT(m.hasOffset() == false);
+  EXPECT(m.isOffset64Bit() == true);
+  EXPECT(m.offset() == 0);
+
+  m.setOffset(-1);
+  EXPECT(m.offsetLo32() == -1);
+  EXPECT(m.offset() == -1);
+
+  int64_t x = int64_t(0xFF00FF0000000001u);
+  int32_t xHi = int32_t(0xFF00FF00u);
+
+  m.setOffset(x);
+  EXPECT(m.offset() == x);
+  EXPECT(m.offsetLo32() == 1);
+  EXPECT(m.offsetHi32() == xHi);
+
+  INFO("Checking basic functionality of Imm");
+  Imm immValue(-42);
+  EXPECT(immValue.type() == ImmType::kInt);
+  EXPECT(Imm(-1).value() == -1);
+  EXPECT(imm(-1).value() == -1);
+  EXPECT(immValue.value() == -42);
+  EXPECT(imm(0xFFFFFFFF).value() == int64_t(0xFFFFFFFF));
+
+  Imm immDouble(0.4);
+  EXPECT(immDouble.type() == ImmType::kDouble);
+  EXPECT(immDouble.valueAs<double>() == 0.4);
+  EXPECT(immDouble == imm(0.4));
+
+  EXPECT(Imm(StrongEnumForImmTests::kValue0).value() == 0);
+  EXPECT(Imm(StrongEnumForImmTests::kValue0xFFFFFFFF).value() == 0xFFFFFFFFu);
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/operand.h b/lib/lepton/asmjit/core/operand.h
new file mode 100644
index 0000000000..2f81a217f1
--- /dev/null
+++ b/lib/lepton/asmjit/core/operand.h
@@ -0,0 +1,1611 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_OPERAND_H_INCLUDED
+#define ASMJIT_CORE_OPERAND_H_INCLUDED
+
+#include "../core/archcommons.h"
+#include "../core/support.h"
+#include "../core/type.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_assembler
+//! \{
+
+//! Operand type used by \ref Operand_.
+enum class OperandType : uint32_t {
+  //! Not an operand or not initialized.
+  kNone = 0,
+  //! Operand is a register.
+  kReg = 1,
+  //! Operand is a memory.
+  kMem = 2,
+  //! Operand is an immediate value.
+  kImm = 3,
+  //! Operand is a label.
+  kLabel = 4,
+
+  //! Maximum value of `OperandType`.
+  kMaxValue = kLabel
+};
+
+static_assert(uint32_t(OperandType::kMem) == uint32_t(OperandType::kReg) + 1,
+              "AsmJit requires that `OperandType::kMem` equals to `OperandType::kReg + 1`");
+
+//! Register mask is a convenience typedef that describes a mask where each bit describes a physical register id
+//! in the same \ref RegGroup. At the moment 32 bits are enough as AsmJit doesn't support any architecture that
+//! would provide more than 32 registers for a register group.
+typedef uint32_t RegMask;
+
+//! Register type.
+//!
+//! Provides a unique type that can be used to identify a register or its view.
+enum class RegType : uint8_t {
+  //! No register - unused, invalid, multiple meanings.
+  kNone = 0,
+
+  //! This is not a register type. This value is reserved for a \ref Label that used in \ref BaseMem as a base.
+  //!
+  //! Label tag is used as a sub-type, forming a unique signature across all operand types as 0x1 is never associated
+  //! with any register type. This means that a memory operand's BASE register can be constructed from virtually any
+  //! operand (register vs. label) by just assigning its type (register type or label-tag) and operand id.
+  kLabelTag = 1,
+
+  //! Universal type describing program counter (PC) or instruction pointer (IP) register, if the target architecture
+  //! actually exposes it as a separate register type, which most modern targets do.
+  kPC = 2,
+
+  //! 8-bit low general purpose register (X86).
+  kGp8Lo = 3,
+  //! 8-bit high general purpose register (X86).
+  kGp8Hi = 4,
+  //! 16-bit general purpose register (X86).
+  kGp16 = 5,
+  //! 32-bit general purpose register (X86|ARM).
+  kGp32 = 6,
+  //! 64-bit general purpose register (X86|ARM).
+  kGp64 = 7,
+  //! 8-bit view of a vector register (ARM).
+  kVec8 = 8,
+  //! 16-bit view of a vector register (ARM).
+  kVec16 = 9,
+  //! 32-bit view of a vector register (ARM).
+  kVec32 = 10,
+  //! 64-bit view of a vector register (ARM).
+  //!
+  //! \note This is never used for MMX registers on X86, MMX registers have its own category.
+  kVec64 = 11,
+  //! 128-bit view of a vector register (X86|ARM).
+  kVec128 = 12,
+  //! 256-bit view of a vector register (X86).
+  kVec256 = 13,
+  //! 512-bit view of a vector register (X86).
+  kVec512 = 14,
+  //! 1024-bit view of a vector register (future).
+  kVec1024 = 15,
+  //! View of a vector register, which width is implementation specific (AArch64).
+  kVecNLen = 16,
+
+  //! Mask register (X86).
+  kMask = 17,
+
+  //! Start of architecture dependent register types.
+  kExtra = 18,
+
+  // X86 Specific Register Types
+  // ---------------------------
+
+  // X86 Specific Register Types
+  // ===========================
+
+  //! Instruction pointer (RIP), only addressable in \ref x86::Mem in 64-bit targets.
+  kX86_Rip = kPC,
+  //! Low GPB register (AL, BL, CL, DL, ...).
+  kX86_GpbLo = kGp8Lo,
+  //! High GPB register (AH, BH, CH, DH only).
+  kX86_GpbHi = kGp8Hi,
+  //! GPW register.
+  kX86_Gpw = kGp16,
+  //! GPD register.
+  kX86_Gpd = kGp32,
+  //! GPQ register (64-bit).
+  kX86_Gpq = kGp64,
+  //! XMM register (SSE+).
+  kX86_Xmm = kVec128,
+  //! YMM register (AVX+).
+  kX86_Ymm = kVec256,
+  //! ZMM register (AVX512+).
+  kX86_Zmm = kVec512,
+  //! K register (AVX512+).
+  kX86_KReg = kMask,
+  //! MMX register.
+  kX86_Mm = kExtra + 0,
+  //! Segment register (None, ES, CS, SS, DS, FS, GS).
+  kX86_SReg = kExtra + 1,
+  //! Control register (CR).
+  kX86_CReg = kExtra + 2,
+  //! Debug register (DR).
+  kX86_DReg = kExtra + 3,
+  //! FPU (x87) register.
+  kX86_St = kExtra + 4,
+  //! Bound register (BND).
+  kX86_Bnd = kExtra + 5,
+  //! TMM register (AMX_TILE)
+  kX86_Tmm = kExtra + 6,
+
+  // ARM Specific Register Types
+  // ===========================
+
+  //! Program pointer (PC) register (AArch64).
+  kARM_PC = kPC,
+  //! 32-bit general purpose register (R or W).
+  kARM_GpW = kGp32,
+  //! 64-bit general purpose register (X).
+  kARM_GpX = kGp64,
+  //! 8-bit view of VFP/ASIMD register (B).
+  kARM_VecB = kVec8,
+  //! 16-bit view of VFP/ASIMD register (H).
+  kARM_VecH = kVec16,
+  //! 32-bit view of VFP/ASIMD register (S).
+  kARM_VecS = kVec32,
+  //! 64-bit view of VFP/ASIMD register (D).
+  kARM_VecD = kVec64,
+  //! 128-bit view of VFP/ASIMD register (Q|V).
+  kARM_VecV = kVec128,
+
+  //! Maximum value of `RegType`.
+  kMaxValue = 31
+};
+ASMJIT_DEFINE_ENUM_COMPARE(RegType)
+
+//! Register group.
+//!
+//! Provides a unique value that identifies groups of registers and their views.
+enum class RegGroup : uint8_t {
+  //! General purpose register group compatible with all backends.
+  kGp = 0,
+  //! Vector register group compatible with all backends.
+  //!
+  //! Describes X86 XMM|YMM|ZMM registers ARM/AArch64 V registers.
+  kVec = 1,
+
+  //! Extra virtual group #2 that can be used by Compiler for register allocation.
+  kExtraVirt2 = 2,
+  //! Extra virtual group #3 that can be used by Compiler for register allocation.
+  kExtraVirt3 = 3,
+
+  //! Program counter group.
+  kPC = 4,
+
+  //! Extra non-virtual group that can be used by registers not managed by Compiler.
+  kExtraNonVirt = 5,
+
+  // X86 Specific Register Groups
+  // ----------------------------
+
+  //! K register group (KReg) - maps to \ref RegGroup::kExtraVirt2 (X86, X86_64).
+  kX86_K = kExtraVirt2,
+  //! MMX register group (MM) - maps to \ref RegGroup::kExtraVirt3 (X86, X86_64).
+  kX86_MM = kExtraVirt3,
+
+  //! Instruction pointer (X86, X86_64).
+  kX86_Rip = kPC,
+  //! Segment register group (X86, X86_64).
+  kX86_SReg = kExtraNonVirt + 0,
+  //! CR register group (X86, X86_64).
+  kX86_CReg = kExtraNonVirt + 1,
+  //! DR register group (X86, X86_64).
+  kX86_DReg = kExtraNonVirt + 2,
+  //! FPU register group (X86, X86_64).
+  kX86_St = kExtraNonVirt + 3,
+  //! BND register group (X86, X86_64).
+  kX86_Bnd = kExtraNonVirt + 4,
+  //! TMM register group (X86, X86_64).
+  kX86_Tmm = kExtraNonVirt + 5,
+
+  //! First group - only used in loops.
+  k0 = 0,
+  //! Last value of a virtual register that is managed by \ref BaseCompiler.
+  kMaxVirt = Globals::kNumVirtGroups - 1,
+  //! Maximum value of `RegGroup`.
+  kMaxValue = 15
+};
+ASMJIT_DEFINE_ENUM_COMPARE(RegGroup)
+
+typedef Support::EnumValues<RegGroup, RegGroup::kGp, RegGroup::kMaxVirt> RegGroupVirtValues;
+
+//! Operand signature is a 32-bit number describing \ref Operand and some of its payload.
+//!
+//! In AsmJit operand signature is used to store additional payload of register, memory, and immediate operands.
+//! In practice the biggest pressure on OperandSignature is from \ref BaseMem and architecture specific memory
+//! operands that need to store additional payload that cannot be stored elsewhere as values of all other members
+//! are fully specified by \ref BaseMem.
+struct OperandSignature {
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    // Operand type (3 least significant bits).
+    // |........|........|........|.....XXX|
+    kOpTypeShift = 0,
+    kOpTypeMask = 0x07u << kOpTypeShift,
+
+    // Register type (5 bits).
+    // |........|........|........|XXXXX...|
+    kRegTypeShift = 3,
+    kRegTypeMask = 0x1Fu << kRegTypeShift,
+
+    // Register group (4 bits).
+    // |........|........|....XXXX|........|
+    kRegGroupShift = 8,
+    kRegGroupMask = 0x0Fu << kRegGroupShift,
+
+    // Memory base type (5 bits).
+    // |........|........|........|XXXXX...|
+    kMemBaseTypeShift = 3,
+    kMemBaseTypeMask = 0x1Fu << kMemBaseTypeShift,
+
+    // Memory index type (5 bits).
+    // |........|........|...XXXXX|........|
+    kMemIndexTypeShift = 8,
+    kMemIndexTypeMask = 0x1Fu << kMemIndexTypeShift,
+
+    // Memory base+index combined (10 bits).
+    // |........|........|...XXXXX|XXXXX...|
+    kMemBaseIndexShift = 3,
+    kMemBaseIndexMask = 0x3FFu << kMemBaseIndexShift,
+
+    // This memory operand represents a home-slot or stack (Compiler) (1 bit).
+    // |........|........|..X.....|........|
+    kMemRegHomeShift = 13,
+    kMemRegHomeFlag = 0x01u << kMemRegHomeShift,
+
+    // Immediate type (1 bit).
+    // |........|........|........|....X...|
+    kImmTypeShift = 3,
+    kImmTypeMask = 0x01u << kImmTypeShift,
+
+    // Predicate used by either registers or immediate values (4 bits).
+    // |........|XXXX....|........|........|
+    kPredicateShift = 20,
+    kPredicateMask = 0x0Fu << kPredicateShift,
+
+    // Operand size (8 most significant bits).
+    // |XXXXXXXX|........|........|........|
+    kSizeShift = 24,
+    kSizeMask = 0xFFu << kSizeShift
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  uint32_t _bits;
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //!
+  //! Overloaded operators make `OperandSignature` behave like regular integer.
+  //!
+  //! \{
+
+  inline constexpr bool operator!() const noexcept { return _bits != 0; }
+  inline constexpr explicit operator bool() const noexcept { return _bits != 0; }
+
+  inline OperandSignature& operator|=(uint32_t x) noexcept { _bits |= x; return *this; }
+  inline OperandSignature& operator&=(uint32_t x) noexcept { _bits &= x; return *this; }
+  inline OperandSignature& operator^=(uint32_t x) noexcept { _bits ^= x; return *this; }
+
+  inline OperandSignature& operator|=(const OperandSignature& other) noexcept { return operator|=(other._bits); }
+  inline OperandSignature& operator&=(const OperandSignature& other) noexcept { return operator&=(other._bits); }
+  inline OperandSignature& operator^=(const OperandSignature& other) noexcept { return operator^=(other._bits); }
+
+  inline constexpr OperandSignature operator~() const noexcept { return OperandSignature{~_bits}; }
+
+  inline constexpr OperandSignature operator|(uint32_t x) const noexcept { return OperandSignature{_bits | x}; }
+  inline constexpr OperandSignature operator&(uint32_t x) const noexcept { return OperandSignature{_bits & x}; }
+  inline constexpr OperandSignature operator^(uint32_t x) const noexcept { return OperandSignature{_bits ^ x}; }
+
+  inline constexpr OperandSignature operator|(const OperandSignature& other) const noexcept { return OperandSignature{_bits | other._bits}; }
+  inline constexpr OperandSignature operator&(const OperandSignature& other) const noexcept { return OperandSignature{_bits & other._bits}; }
+  inline constexpr OperandSignature operator^(const OperandSignature& other) const noexcept { return OperandSignature{_bits ^ other._bits}; }
+
+  inline constexpr bool operator==(uint32_t x) const noexcept { return _bits == x; }
+  inline constexpr bool operator!=(uint32_t x) const noexcept { return _bits != x; }
+
+  inline constexpr bool operator==(const OperandSignature& other) const noexcept { return _bits == other._bits; }
+  inline constexpr bool operator!=(const OperandSignature& other) const noexcept { return _bits != other._bits; }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline void reset() noexcept { _bits = 0; }
+
+  inline constexpr uint32_t bits() const noexcept { return _bits; }
+  inline void setBits(uint32_t bits) noexcept { _bits = bits; }
+
+  template<uint32_t kFieldMask, uint32_t kFieldShift = Support::ConstCTZ<kFieldMask>::value>
+  inline constexpr bool hasField() const noexcept {
+    return (_bits & kFieldMask) != 0;
+  }
+
+  template<uint32_t kFieldMask, uint32_t kFieldShift = Support::ConstCTZ<kFieldMask>::value>
+  inline constexpr bool hasField(uint32_t value) const noexcept {
+    return (_bits & kFieldMask) != value << kFieldShift;
+  }
+
+  template<uint32_t kFieldMask, uint32_t kFieldShift = Support::ConstCTZ<kFieldMask>::value>
+  inline constexpr uint32_t getField() const noexcept {
+    return (_bits >> kFieldShift) & (kFieldMask >> kFieldShift);
+  }
+
+  template<uint32_t kFieldMask, uint32_t kFieldShift = Support::ConstCTZ<kFieldMask>::value>
+  inline void setField(uint32_t value) noexcept {
+    ASMJIT_ASSERT((value & ~(kFieldMask >> kFieldShift)) == 0);
+    _bits = (_bits & ~kFieldMask) | (value << kFieldShift);
+  }
+
+  inline constexpr OperandSignature subset(uint32_t mask) const noexcept { return OperandSignature{_bits & mask}; }
+
+  template<uint32_t kFieldMask>
+  inline constexpr bool matchesSignature(const OperandSignature& signature) const noexcept {
+    return (_bits & kFieldMask) == signature._bits;
+  }
+
+  template<uint32_t kFieldMask>
+  inline constexpr bool matchesFields(uint32_t bits) const noexcept {
+    return (_bits & kFieldMask) == bits;
+  }
+
+  template<uint32_t kFieldMask>
+  inline constexpr bool matchesFields(const OperandSignature& fields) const noexcept {
+    return (_bits & kFieldMask) == fields._bits;
+  }
+
+  inline constexpr bool isValid() const noexcept { return _bits != 0; }
+
+  inline constexpr OperandType opType() const noexcept { return (OperandType)getField<kOpTypeMask>(); }
+
+  inline constexpr RegType regType() const noexcept { return (RegType)getField<kRegTypeMask>(); }
+  inline constexpr RegGroup regGroup() const noexcept { return (RegGroup)getField<kRegGroupMask>(); }
+
+  inline constexpr RegType memBaseType() const noexcept { return (RegType)getField<kMemBaseTypeMask>(); }
+  inline constexpr RegType memIndexType() const noexcept { return (RegType)getField<kMemIndexTypeMask>(); }
+
+  inline constexpr uint32_t predicate() const noexcept { return getField<kPredicateMask>(); }
+  inline constexpr uint32_t size() const noexcept { return getField<kSizeMask>(); }
+
+  inline void setOpType(OperandType opType) noexcept { setField<kOpTypeMask>(uint32_t(opType)); }
+  inline void setRegType(RegType regType) noexcept { setField<kRegTypeMask>(uint32_t(regType)); }
+  inline void setRegGroup(RegGroup regGroup) noexcept { setField<kRegGroupMask>(uint32_t(regGroup)); }
+
+  inline void setMemBaseType(RegGroup baseType) noexcept { setField<kMemBaseTypeMask>(uint32_t(baseType)); }
+  inline void setMemIndexType(RegGroup indexType) noexcept { setField<kMemIndexTypeMask>(uint32_t(indexType)); }
+
+  inline void setPredicate(uint32_t predicate) noexcept { setField<kPredicateMask>(predicate); }
+  inline void setSize(uint32_t size) noexcept { setField<kSizeMask>(size); }
+
+  //! \}
+
+  //! \name Static Constructors
+  //! \{
+
+  static inline constexpr OperandSignature fromBits(uint32_t bits) noexcept {
+    return OperandSignature{bits};
+  }
+
+  template<uint32_t kFieldMask, typename T>
+  static inline constexpr OperandSignature fromValue(const T& value) noexcept {
+    return OperandSignature{uint32_t(value) << Support::ConstCTZ<kFieldMask>::value};
+  }
+
+  static inline constexpr OperandSignature fromOpType(OperandType opType) noexcept {
+    return OperandSignature{uint32_t(opType) << kOpTypeShift};
+  }
+
+  static inline constexpr OperandSignature fromRegType(RegType regType) noexcept {
+    return OperandSignature{uint32_t(regType) << kRegTypeShift};
+  }
+
+  static inline constexpr OperandSignature fromRegGroup(RegGroup regGroup) noexcept {
+    return OperandSignature{uint32_t(regGroup) << kRegGroupShift};
+  }
+
+  static inline constexpr OperandSignature fromRegTypeAndGroup(RegType regType, RegGroup regGroup) noexcept {
+    return fromRegType(regType) | fromRegGroup(regGroup);
+  }
+
+  static inline constexpr OperandSignature fromMemBaseType(RegType baseType) noexcept {
+    return OperandSignature{uint32_t(baseType) << kMemBaseTypeShift};
+  }
+
+  static inline constexpr OperandSignature fromMemIndexType(RegType indexType) noexcept {
+    return OperandSignature{uint32_t(indexType) << kMemIndexTypeShift};
+  }
+
+  static inline constexpr OperandSignature fromPredicate(uint32_t predicate) noexcept {
+    return OperandSignature{predicate << kPredicateShift};
+  }
+
+  static inline constexpr OperandSignature fromSize(uint32_t size) noexcept {
+    return OperandSignature{size << kSizeShift};
+  }
+
+  //! \}
+};
+
+//! Base class representing an operand in AsmJit (non-default constructed version).
+//!
+//! Contains no initialization code and can be used safely to define an array of operands that won't be initialized.
+//! This is a \ref Operand base structure designed to be statically initialized, static const, or to be used by user
+//! code to define an array of operands without having them default initialized at construction time.
+//!
+//! The key difference between \ref Operand and \ref Operand_ is:
+//!
+//! ```
+//! Operand_ xArray[10];    // Not initialized, contains garbage.
+//! Operand_ yArray[10] {}; // All operands initialized to none explicitly (zero initialized).
+//! Operand  yArray[10];    // All operands initialized to none implicitly (zero initialized).
+//! ```
+struct Operand_ {
+  //! \name Types
+  //! \{
+
+  typedef OperandSignature Signature;
+
+  //! \}
+
+  //! \name Constants
+  //! \{
+
+  // Indexes to `_data` array.
+  enum DataIndex : uint32_t {
+    kDataMemIndexId = 0,
+    kDataMemOffsetLo = 1,
+
+    kDataImmValueLo = ASMJIT_ARCH_LE ? 0 : 1,
+    kDataImmValueHi = ASMJIT_ARCH_LE ? 1 : 0
+  };
+
+  //! Constants useful for VirtId <-> Index translation.
+  enum VirtIdConstants : uint32_t {
+    //! Minimum valid packed-id.
+    kVirtIdMin = 256,
+    //! Maximum valid packed-id, excludes Globals::kInvalidId.
+    kVirtIdMax = Globals::kInvalidId - 1,
+    //! Count of valid packed-ids.
+    kVirtIdCount = uint32_t(kVirtIdMax - kVirtIdMin + 1)
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Provides operand type and additional payload.
+  Signature _signature;
+  //! Either base id as used by memory operand or any id as used by others.
+  uint32_t _baseId;
+
+  //! Data specific to the operand type.
+  //!
+  //! The reason we don't use union is that we have `constexpr` constructors that construct operands and other
+  //!`constexpr` functions that return whether another Operand or something else. These cannot generally work with
+  //! unions so we also cannot use `union` if we want to be standard compliant.
+  uint32_t _data[2];
+
+  //! \}
+
+  //! Tests whether the given `id` is a valid virtual register id. Since AsmJit supports both physical and virtual
+  //! registers it must be able to distinguish between these two. The idea is that physical registers are always
+  //! limited in size, so virtual identifiers start from `kVirtIdMin` and end at `kVirtIdMax`.
+  static inline bool isVirtId(uint32_t id) noexcept { return id - kVirtIdMin < uint32_t(kVirtIdCount); }
+  //! Converts a real-id into a packed-id that can be stored in Operand.
+  static inline uint32_t indexToVirtId(uint32_t id) noexcept { return id + kVirtIdMin; }
+  //! Converts a packed-id back to real-id.
+  static inline uint32_t virtIdToIndex(uint32_t id) noexcept { return id - kVirtIdMin; }
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! \cond INTERNAL
+  //! Initializes a `BaseReg` operand from `signature` and register `id`.
+  inline void _initReg(const Signature& signature, uint32_t id) noexcept {
+    _signature = signature;
+    _baseId = id;
+    _data[0] = 0;
+    _data[1] = 0;
+  }
+  //! \endcond
+
+  //! Initializes the operand from `other` operand (used by operator overloads).
+  inline void copyFrom(const Operand_& other) noexcept { memcpy(this, &other, sizeof(Operand_)); }
+
+  //! Resets the `Operand` to none.
+  //!
+  //! None operand is defined the following way:
+  //!   - Its signature is zero (OperandType::kNone, and the rest zero as well).
+  //!   - Its id is `0`.
+  //!   - The reserved8_4 field is set to `0`.
+  //!   - The reserved12_4 field is set to zero.
+  //!
+  //! In other words, reset operands have all members set to zero. Reset operand must match the Operand state
+  //! right after its construction. Alternatively, if you have an array of operands, you can simply use `memset()`.
+  //!
+  //! ```
+  //! using namespace asmjit;
+  //!
+  //! Operand a;
+  //! Operand b;
+  //! assert(a == b);
+  //!
+  //! b = x86::eax;
+  //! assert(a != b);
+  //!
+  //! b.reset();
+  //! assert(a == b);
+  //!
+  //! memset(&b, 0, sizeof(Operand));
+  //! assert(a == b);
+  //! ```
+  inline void reset() noexcept {
+    _signature.reset();
+    _baseId = 0;
+    _data[0] = 0;
+    _data[1] = 0;
+  }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  //! Tests whether this operand is the same as `other`.
+  inline constexpr bool operator==(const Operand_& other) const noexcept { return  equals(other); }
+  //! Tests whether this operand is not the same as `other`.
+  inline constexpr bool operator!=(const Operand_& other) const noexcept { return !equals(other); }
+
+  //! \}
+
+  //! \name Cast
+  //! \{
+
+  //! Casts this operand to `T` type.
+  template<typename T>
+  inline T& as() noexcept { return static_cast<T&>(*this); }
+
+  //! Casts this operand to `T` type (const).
+  template<typename T>
+  inline const T& as() const noexcept { return static_cast<const T&>(*this); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether the operand's signature matches the signature of the `other` operand.
+  inline constexpr bool hasSignature(const Operand_& other) const noexcept { return _signature == other._signature; }
+  //! Tests whether the operand's signature matches the given signature `sign`.
+  inline constexpr bool hasSignature(const Signature& other) const noexcept { return _signature == other; }
+
+  //! Returns operand signature as unsigned 32-bit integer.
+  //!
+  //! Signature is first 4 bytes of the operand data. It's used mostly for operand checking as it's
+  //! much faster to check packed 4 bytes at once than having to check these bytes individually.
+  inline constexpr Signature signature() const noexcept { return _signature; }
+
+  //! Sets the operand signature, see `signature()`.
+  //!
+  //! \note Improper use of `setSignature()` can lead to hard-to-debug errors.
+  inline void setSignature(const Signature& signature) noexcept { _signature = signature; }
+
+  //! Returns the type of the operand, see `OpType`.
+  inline constexpr OperandType opType() const noexcept { return _signature.opType(); }
+  //! Tests whether the operand is none (`OperandType::kNone`).
+  inline constexpr bool isNone() const noexcept { return _signature == Signature::fromBits(0); }
+  //! Tests whether the operand is a register (`OperandType::kReg`).
+  inline constexpr bool isReg() const noexcept { return opType() == OperandType::kReg; }
+  //! Tests whether the operand is a memory location (`OperandType::kMem`).
+  inline constexpr bool isMem() const noexcept { return opType() == OperandType::kMem; }
+  //! Tests whether the operand is an immediate (`OperandType::kImm`).
+  inline constexpr bool isImm() const noexcept { return opType() == OperandType::kImm; }
+  //! Tests whether the operand is a label (`OperandType::kLabel`).
+  inline constexpr bool isLabel() const noexcept { return opType() == OperandType::kLabel; }
+
+  //! Tests whether the operand is a physical register.
+  inline constexpr bool isPhysReg() const noexcept { return isReg() && _baseId < 0xFFu; }
+  //! Tests whether the operand is a virtual register.
+  inline constexpr bool isVirtReg() const noexcept { return isReg() && _baseId > 0xFFu; }
+
+  //! Tests whether the operand specifies a size (i.e. the size is not zero).
+  inline constexpr bool hasSize() const noexcept { return _signature.hasField<Signature::kSizeMask>(); }
+  //! Tests whether the size of the operand matches `size`.
+  inline constexpr bool hasSize(uint32_t s) const noexcept { return size() == s; }
+
+  //! Returns the size of the operand in bytes.
+  //!
+  //! The value returned depends on the operand type:
+  //!   * None  - Should always return zero size.
+  //!   * Reg   - Should always return the size of the register. If the register size depends on architecture
+  //!             (like `x86::CReg` and `x86::DReg`) the size returned should be the greatest possible (so it
+  //!             should return 64-bit size in such case).
+  //!   * Mem   - Size is optional and will be in most cases zero.
+  //!   * Imm   - Should always return zero size.
+  //!   * Label - Should always return zero size.
+  inline constexpr uint32_t size() const noexcept { return _signature.getField<Signature::kSizeMask>(); }
+
+  //! Returns the operand id.
+  //!
+  //! The value returned should be interpreted accordingly to the operand type:
+  //!   * None  - Should be `0`.
+  //!   * Reg   - Physical or virtual register id.
+  //!   * Mem   - Multiple meanings - BASE address (register or label id), or high value of a 64-bit absolute address.
+  //!   * Imm   - Should be `0`.
+  //!   * Label - Label id if it was created by using `newLabel()` or `Globals::kInvalidId` if the label is invalid or
+  //!             not initialized.
+  inline constexpr uint32_t id() const noexcept { return _baseId; }
+
+  //! Tests whether the operand is 100% equal to `other` operand.
+  //!
+  //! \note This basically performs a binary comparison, if aby bit is
+  //! different the operands are not equal.
+  inline constexpr bool equals(const Operand_& other) const noexcept {
+    return (_signature == other._signature) &
+           (_baseId    == other._baseId   ) &
+           (_data[0]   == other._data[0]  ) &
+           (_data[1]   == other._data[1]  ) ;
+  }
+
+  //! Tests whether the operand is a register matching the given register `type`.
+  inline constexpr bool isReg(RegType type) const noexcept {
+    return _signature.subset(Signature::kOpTypeMask | Signature::kRegTypeMask) == (Signature::fromOpType(OperandType::kReg) | Signature::fromRegType(type));
+  }
+
+  //! Tests whether the operand is register and of register `type` and `id`.
+  inline constexpr bool isReg(RegType type, uint32_t id) const noexcept {
+    return isReg(type) && this->id() == id;
+  }
+
+  //! Tests whether the operand is a register or memory.
+  inline constexpr bool isRegOrMem() const noexcept {
+    return Support::isBetween<uint32_t>(uint32_t(opType()), uint32_t(OperandType::kReg), uint32_t(OperandType::kMem));
+  }
+
+  //! \}
+};
+
+//! Base class representing an operand in AsmJit (default constructed version).
+class Operand : public Operand_ {
+public:
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates `kOpNone` operand having all members initialized to zero.
+  inline constexpr Operand() noexcept
+    : Operand_{ Signature::fromOpType(OperandType::kNone), 0u, { 0u, 0u }} {}
+
+  //! Creates a cloned `other` operand.
+  inline constexpr Operand(const Operand& other) noexcept = default;
+
+  //! Creates a cloned `other` operand.
+  inline constexpr explicit Operand(const Operand_& other)
+    : Operand_(other) {}
+
+  //! Creates an operand initialized to raw `[u0, u1, u2, u3]` values.
+  inline constexpr Operand(Globals::Init_, const Signature& u0, uint32_t u1, uint32_t u2, uint32_t u3) noexcept
+    : Operand_{ u0, u1, { u2, u3 }} {}
+
+  //! Creates an uninitialized operand (dangerous).
+  inline explicit Operand(Globals::NoInit_) noexcept {}
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline Operand& operator=(const Operand& other) noexcept = default;
+  inline Operand& operator=(const Operand_& other) noexcept { return operator=(static_cast<const Operand&>(other)); }
+
+  //! \}
+
+  //! \name Clone
+  //! \{
+
+  //! Clones this operand and returns its copy.
+  inline constexpr Operand clone() const noexcept { return Operand(*this); }
+
+  //! \}
+};
+
+static_assert(sizeof(Operand) == 16, "asmjit::Operand must be exactly 16 bytes long");
+
+//! Label (jump target or data location).
+//!
+//! Label represents a location in code typically used as a jump target, but may be also a reference to some data or
+//! a static variable. Label has to be explicitly created by BaseEmitter.
+//!
+//! Example of using labels:
+//!
+//! ```
+//! // Create some emitter (for example x86::Assembler).
+//! x86::Assembler a;
+//!
+//! // Create Label instance.
+//! Label L1 = a.newLabel();
+//!
+//! // ... your code ...
+//!
+//! // Using label.
+//! a.jump(L1);
+//!
+//! // ... your code ...
+//!
+//! // Bind label to the current position, see `BaseEmitter::bind()`.
+//! a.bind(L1);
+//! ```
+class Label : public Operand {
+public:
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a label operand without ID (you must set the ID to make it valid).
+  inline constexpr Label() noexcept
+    : Operand(Globals::Init, Signature::fromOpType(OperandType::kLabel), Globals::kInvalidId, 0, 0) {}
+
+  //! Creates a cloned label operand of `other`.
+  inline constexpr Label(const Label& other) noexcept
+    : Operand(other) {}
+
+  //! Creates a label operand of the given `id`.
+  inline constexpr explicit Label(uint32_t id) noexcept
+    : Operand(Globals::Init, Signature::fromOpType(OperandType::kLabel), id, 0, 0) {}
+
+  inline explicit Label(Globals::NoInit_) noexcept
+    : Operand(Globals::NoInit) {}
+
+  //! Resets the label, will reset all properties and set its ID to `Globals::kInvalidId`.
+  inline void reset() noexcept {
+    _signature = Signature::fromOpType(OperandType::kLabel);
+    _baseId = Globals::kInvalidId;
+    _data[0] = 0;
+    _data[1] = 0;
+  }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline Label& operator=(const Label& other) noexcept = default;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether the label was created by CodeHolder and/or an attached emitter.
+  inline constexpr bool isValid() const noexcept { return _baseId != Globals::kInvalidId; }
+  //! Sets the label `id`.
+  inline void setId(uint32_t id) noexcept { _baseId = id; }
+
+  //! \}
+};
+
+//! \cond INTERNAL
+//! Default register traits.
+struct BaseRegTraits {
+  enum : uint32_t {
+    //! \ref TypeId representing this register type, could be \ref TypeId::kVoid if such type doesn't exist.
+    kTypeId = uint32_t(TypeId::kVoid),
+    //! RegType is not valid by default.
+    kValid = 0,
+    //! Count of registers (0 if none).
+    kCount = 0,
+
+    //! Zero type by default (defeaults to None).
+    kType = uint32_t(RegType::kNone),
+    //! Zero group by default (defaults to GP).
+    kGroup = uint32_t(RegGroup::kGp),
+    //! No size by default.
+    kSize = 0,
+
+    //! Empty signature by default (not even having operand type set to register).
+    kSignature = 0
+  };
+};
+//! \endcond
+
+//! Physical or virtual register operand.
+class BaseReg : public Operand {
+public:
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    //! None or any register (mostly internal).
+    kIdBad = 0xFFu,
+
+    kBaseSignatureMask =
+      Signature::kOpTypeMask   |
+      Signature::kRegTypeMask  |
+      Signature::kRegGroupMask |
+      Signature::kSizeMask,
+
+    kTypeNone = uint32_t(RegType::kNone),
+    kSignature = Signature::fromOpType(OperandType::kReg).bits()
+  };
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a dummy register operand.
+  inline constexpr BaseReg() noexcept
+    : Operand(Globals::Init, Signature::fromOpType(OperandType::kReg), kIdBad, 0, 0) {}
+
+  //! Creates a new register operand which is the same as `other` .
+  inline constexpr BaseReg(const BaseReg& other) noexcept
+    : Operand(other) {}
+
+  //! Creates a new register operand compatible with `other`, but with a different `id`.
+  inline constexpr BaseReg(const BaseReg& other, uint32_t id) noexcept
+    : Operand(Globals::Init, other._signature, id, 0, 0) {}
+
+  //! Creates a register initialized to the given `signature` and `id`.
+  inline constexpr BaseReg(const Signature& signature, uint32_t id) noexcept
+    : Operand(Globals::Init, signature, id, 0, 0) {}
+
+  inline explicit BaseReg(Globals::NoInit_) noexcept
+    : Operand(Globals::NoInit) {}
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline BaseReg& operator=(const BaseReg& other) noexcept = default;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns base signature of the register associated with each register type.
+  //!
+  //! Base signature only contains the operand type, register type, register group, and register size. It doesn't
+  //! contain element type, predicate, or other architecture-specific data. Base signature is a signature that is
+  //! provided by architecture-specific `RegTraits`, like \ref x86::RegTraits.
+  inline constexpr OperandSignature baseSignature() const noexcept { return _signature & kBaseSignatureMask; }
+
+  //! Tests whether the operand's base signature matches the given signature `sign`.
+  inline constexpr bool hasBaseSignature(uint32_t signature) const noexcept { return baseSignature() == signature; }
+  //! Tests whether the operand's base signature matches the given signature `sign`.
+  inline constexpr bool hasBaseSignature(const OperandSignature& signature) const noexcept { return baseSignature() == signature; }
+  //! Tests whether the operand's base signature matches the base signature of the `other` operand.
+  inline constexpr bool hasBaseSignature(const BaseReg& other) const noexcept { return baseSignature() == other.baseSignature(); }
+
+  //! Tests whether this register is the same as `other`.
+  //!
+  //! This is just an optimization. Registers by default only use the first 8 bytes of Operand data, so this method
+  //! takes advantage of this knowledge and only compares these 8 bytes. If both operands were created correctly
+  //! both \ref equals() and \ref isSame() should give the same answer, however, if any of these two contains garbage
+  //! or other metadata in the upper 8 bytes then \ref isSame() may return `true` in cases in which \ref equals()
+  //! returns false.
+  inline constexpr bool isSame(const BaseReg& other) const noexcept {
+    return (_signature == other._signature) & (_baseId == other._baseId);
+  }
+
+  //! Tests whether the register is valid (either virtual or physical).
+  inline constexpr bool isValid() const noexcept { return (_signature != 0) & (_baseId != kIdBad); }
+
+  //! Tests whether this is a physical register.
+  inline constexpr bool isPhysReg() const noexcept { return _baseId < kIdBad; }
+  //! Tests whether this is a virtual register.
+  inline constexpr bool isVirtReg() const noexcept { return _baseId > kIdBad; }
+
+  //! Tests whether the register type matches `type` - same as `isReg(type)`, provided for convenience.
+  inline constexpr bool isType(RegType type) const noexcept { return _signature.subset(Signature::kRegTypeMask) == Signature::fromRegType(type); }
+  //! Tests whether the register group matches `group`.
+  inline constexpr bool isGroup(RegGroup group) const noexcept { return _signature.subset(Signature::kRegGroupMask) == Signature::fromRegGroup(group); }
+
+  //! Tests whether the register is a general purpose register (any size).
+  inline constexpr bool isGp() const noexcept { return isGroup(RegGroup::kGp); }
+  //! Tests whether the register is a vector register.
+  inline constexpr bool isVec() const noexcept { return isGroup(RegGroup::kVec); }
+
+  using Operand_::isReg;
+
+  //! Same as `isType()`, provided for convenience.
+  inline constexpr bool isReg(RegType rType) const noexcept { return isType(rType); }
+  //! Tests whether the register type matches `type` and register id matches `id`.
+  inline constexpr bool isReg(RegType rType, uint32_t id) const noexcept { return isType(rType) && this->id() == id; }
+
+  //! Returns the register type.
+  inline constexpr RegType type() const noexcept { return _signature.regType(); }
+  //! Returns the register group.
+  inline constexpr RegGroup group() const noexcept { return _signature.regGroup(); }
+
+  //! Returns operation predicate of the register (ARM/AArch64).
+  //!
+  //! The meaning depends on architecture, for example on ARM hardware this describes \ref arm::ShiftOp
+  //! of the register.
+  inline constexpr uint32_t predicate() const noexcept { return _signature.getField<Signature::kPredicateMask>(); }
+
+  //! Sets operation predicate of the register to `predicate` (ARM/AArch64).
+  //!
+  //! The meaning depends on architecture, for example on ARM hardware this describes \ref arm::ShiftOp
+  //! of the register.
+  inline void setPredicate(uint32_t predicate) noexcept { _signature.setField<Signature::kPredicateMask>(predicate); }
+
+  //! Resets shift operation type of the register to the default value (ARM/AArch64).
+  inline void resetPredicate() noexcept { _signature.setField<Signature::kPredicateMask>(0); }
+
+  //! Clones the register operand.
+  inline constexpr BaseReg clone() const noexcept { return BaseReg(*this); }
+
+  //! Casts this register to `RegT` by also changing its signature.
+  //!
+  //! \note Improper use of `cloneAs()` can lead to hard-to-debug errors.
+  template<typename RegT>
+  inline constexpr RegT cloneAs() const noexcept { return RegT(Signature(RegT::kSignature), id()); }
+
+  //! Casts this register to `other` by also changing its signature.
+  //!
+  //! \note Improper use of `cloneAs()` can lead to hard-to-debug errors.
+  template<typename RegT>
+  inline constexpr RegT cloneAs(const RegT& other) const noexcept { return RegT(other.signature(), id()); }
+
+  //! Sets the register id to `id`.
+  inline void setId(uint32_t id) noexcept { _baseId = id; }
+
+  //! Sets a 32-bit operand signature based on traits of `RegT`.
+  template<typename RegT>
+  inline void setSignatureT() noexcept { _signature = RegT::kSignature; }
+
+  //! Sets the register `signature` and `id`.
+  inline void setSignatureAndId(const OperandSignature& signature, uint32_t id) noexcept {
+    _signature = signature;
+    _baseId = id;
+  }
+
+  //! \}
+
+  //! \name Static Functions
+  //! \{
+
+  //! Tests whether the `op` operand is a general purpose register.
+  static inline bool isGp(const Operand_& op) noexcept {
+    // Check operand type and register group. Not interested in register type and size.
+    return op.signature().subset(Signature::kOpTypeMask | Signature::kRegGroupMask) == (Signature::fromOpType(OperandType::kReg) | Signature::fromRegGroup(RegGroup::kGp));
+  }
+
+  //! Tests whether the `op` operand is a vector register.
+  static inline bool isVec(const Operand_& op) noexcept {
+    // Check operand type and register group. Not interested in register type and size.
+    return op.signature().subset(Signature::kOpTypeMask | Signature::kRegGroupMask) == (Signature::fromOpType(OperandType::kReg) | Signature::fromRegGroup(RegGroup::kVec));
+  }
+
+  //! Tests whether the `op` is a general purpose register of the given `id`.
+  static inline bool isGp(const Operand_& op, uint32_t id) noexcept { return isGp(op) & (op.id() == id); }
+  //! Tests whether the `op` is a vector register of the given `id`.
+  static inline bool isVec(const Operand_& op, uint32_t id) noexcept { return isVec(op) & (op.id() == id); }
+
+  //! \}
+};
+
+//! RegOnly is 8-byte version of `BaseReg` that allows to store either register or nothing.
+//!
+//! It's designed to decrease the space consumed by an extra "operand" in \ref BaseEmitter and \ref InstNode.
+struct RegOnly {
+  //! \name Types
+  //! \{
+
+  typedef OperandSignature Signature;
+
+  //! \}
+
+  //! Operand signature - only \ref OperandType::kNone and \ref OperandType::kReg are supported.
+  Signature _signature;
+  //! Physical or virtual register id.
+  uint32_t _id;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Initializes the `RegOnly` instance to hold register `signature` and `id`.
+  inline void init(const OperandSignature& signature, uint32_t id) noexcept {
+    _signature = signature;
+    _id = id;
+  }
+
+  inline void init(const BaseReg& reg) noexcept { init(reg.signature(), reg.id()); }
+  inline void init(const RegOnly& reg) noexcept { init(reg.signature(), reg.id()); }
+
+  //! Resets the `RegOnly` members to zeros (none).
+  inline void reset() noexcept { init(Signature::fromBits(0), 0); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether this ExtraReg is none (same as calling `Operand_::isNone()`).
+  inline constexpr bool isNone() const noexcept { return _signature == 0; }
+  //! Tests whether the register is valid (either virtual or physical).
+  inline constexpr bool isReg() const noexcept { return _signature != 0; }
+
+  //! Tests whether this is a physical register.
+  inline constexpr bool isPhysReg() const noexcept { return _id < BaseReg::kIdBad; }
+  //! Tests whether this is a virtual register (used by `BaseCompiler`).
+  inline constexpr bool isVirtReg() const noexcept { return _id > BaseReg::kIdBad; }
+
+  //! Returns the register signature or 0 if no register is assigned.
+  inline constexpr OperandSignature signature() const noexcept { return _signature; }
+  //! Returns the register id.
+  //!
+  //! \note Always check whether the register is assigned before using the returned identifier as
+  //! non-assigned `RegOnly` instance would return zero id, which is still a valid register id.
+  inline constexpr uint32_t id() const noexcept { return _id; }
+
+  //! Sets the register id.
+  inline void setId(uint32_t id) noexcept { _id = id; }
+
+  //! Returns the register type.
+  inline constexpr RegType type() const noexcept { return _signature.regType(); }
+  //! Returns the register group.
+  inline constexpr RegGroup group() const noexcept { return _signature.regGroup(); }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Converts this ExtraReg to a real `RegT` operand.
+  template<typename RegT>
+  inline constexpr RegT toReg() const noexcept { return RegT(_signature, _id); }
+
+  //! \}
+};
+
+//! \cond INTERNAL
+//! Adds a template specialization for `REG_TYPE` into the local `RegTraits`.
+#define ASMJIT_DEFINE_REG_TRAITS(REG, REG_TYPE, GROUP, SIZE, COUNT, TYPE_ID) \
+template<>                                                                   \
+struct RegTraits<REG_TYPE> {                                                 \
+  typedef REG RegT;                                                          \
+                                                                             \
+  enum : uint32_t {                                                          \
+    kValid = uint32_t(true),                                                 \
+    kCount = uint32_t(COUNT),                                                \
+    kType = uint32_t(REG_TYPE),                                              \
+    kGroup = uint32_t(GROUP),                                                \
+    kSize = uint32_t(SIZE),                                                  \
+    kTypeId = uint32_t(TYPE_ID),                                             \
+                                                                             \
+    kSignature = (OperandSignature::fromOpType(OperandType::kReg) |          \
+                  OperandSignature::fromRegType(REG_TYPE)         |          \
+                  OperandSignature::fromRegGroup(GROUP)           |          \
+                  OperandSignature::fromSize(kSize)).bits(),                 \
+  };                                                                         \
+}
+
+//! Adds constructors and member functions to a class that implements abstract register. Abstract register is register
+//! that doesn't have type or signature yet, it's a base class like `x86::Reg` or `arm::Reg`.
+#define ASMJIT_DEFINE_ABSTRACT_REG(REG, BASE)                                \
+public:                                                                      \
+  /*! Default constructor that only setups basics. */                        \
+  inline constexpr REG() noexcept                                            \
+    : BASE(Signature{kSignature}, kIdBad) {}                                 \
+                                                                             \
+  /*! Makes a copy of the `other` register operand. */                       \
+  inline constexpr REG(const REG& other) noexcept                            \
+    : BASE(other) {}                                                         \
+                                                                             \
+  /*! Makes a copy of the `other` register having id set to `id` */          \
+  inline constexpr REG(const BaseReg& other, uint32_t id) noexcept           \
+    : BASE(other, id) {}                                                     \
+                                                                             \
+  /*! Creates a register based on `signature` and `id`. */                   \
+  inline constexpr REG(const OperandSignature& sgn, uint32_t id) noexcept    \
+    : BASE(sgn, id) {}                                                       \
+                                                                             \
+  /*! Creates a completely uninitialized REG register operand (garbage). */  \
+  inline explicit REG(Globals::NoInit_) noexcept                             \
+    : BASE(Globals::NoInit) {}                                               \
+                                                                             \
+  /*! Creates a new register from register type and id. */                   \
+  static inline REG fromTypeAndId(RegType type, uint32_t id) noexcept {      \
+    return REG(signatureOf(type), id);                                       \
+  }                                                                          \
+                                                                             \
+  /*! Clones the register operand. */                                        \
+  inline constexpr REG clone() const noexcept { return REG(*this); }         \
+                                                                             \
+  inline REG& operator=(const REG& other) noexcept = default;
+
+//! Adds constructors and member functions to a class that implements final register. Final registers MUST HAVE a valid
+//! signature.
+#define ASMJIT_DEFINE_FINAL_REG(REG, BASE, TRAITS)                           \
+public:                                                                      \
+  enum : uint32_t {                                                          \
+    kThisType  = TRAITS::kType,                                              \
+    kThisGroup = TRAITS::kGroup,                                             \
+    kThisSize  = TRAITS::kSize,                                              \
+    kSignature = TRAITS::kSignature                                          \
+  };                                                                         \
+                                                                             \
+  ASMJIT_DEFINE_ABSTRACT_REG(REG, BASE)                                      \
+                                                                             \
+  /*! Creates a register operand having its id set to `id`. */               \
+  inline constexpr explicit REG(uint32_t id) noexcept                        \
+    : BASE(Signature{kSignature}, id) {}
+//! \endcond
+
+//! Base class for all memory operands.
+//!
+//! The data is split into the following parts:
+//!
+//!   - BASE - Base register or label - requires 36 bits total. 4 bits are used to encode the type of the BASE operand
+//!     (label vs. register type) and the remaining 32 bits define the BASE id, which can be a physical or virtual
+//!     register index. If BASE type is zero, which is never used as a register type and label doesn't use it as well
+//!     then BASE field contains a high DWORD of a possible 64-bit absolute address, which is possible on X64.
+//!
+//!   - INDEX - Index register (or theoretically Label, which doesn't make sense). Encoding is similar to BASE - it
+//!     also requires 36 bits and splits the encoding to INDEX type (4 bits defining the register type) and 32-bit id.
+//!
+//!   - OFFSET - A relative offset of the address. Basically if BASE is specified the relative displacement adjusts
+//!     BASE and an optional INDEX. if BASE is not specified then the OFFSET should be considered as ABSOLUTE address
+//!     (at least on X86). In that case its low 32 bits are stored in DISPLACEMENT field and the remaining high 32
+//!     bits are stored in BASE.
+//!
+//!   - OTHER - There is rest 8 bits that can be used for whatever purpose. For example \ref x86::Mem operand uses
+//!     these bits to store segment override prefix and index shift (or scale).
+class BaseMem : public Operand {
+public:
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a default `BaseMem` operand, that points to [0].
+  inline constexpr BaseMem() noexcept
+      : Operand(Globals::Init, Signature::fromOpType(OperandType::kMem), 0, 0, 0) {}
+
+  //! Creates a `BaseMem` operand that is a clone of `other`.
+  inline constexpr BaseMem(const BaseMem& other) noexcept
+    : Operand(other) {}
+
+  //! Creates a `BaseMem` operand from `baseReg` and `offset`.
+  //!
+  //! \note This is an architecture independent constructor that can be used to create an architecture
+  //! independent memory operand to be used in portable code that can handle multiple architectures.
+  inline constexpr explicit BaseMem(const BaseReg& baseReg, int32_t offset = 0) noexcept
+    : Operand(Globals::Init,
+              Signature::fromOpType(OperandType::kMem) | Signature::fromMemBaseType(baseReg.type()),
+              baseReg.id(),
+              0,
+              uint32_t(offset)) {}
+
+  //! \cond INTERNAL
+  //! Creates a `BaseMem` operand from 4 integers as used by `Operand_` struct.
+  inline constexpr BaseMem(const OperandSignature& u0, uint32_t baseId, uint32_t indexId, int32_t offset) noexcept
+    : Operand(Globals::Init, u0, baseId, indexId, uint32_t(offset)) {}
+  //! \endcond
+
+  //! Creates a completely uninitialized `BaseMem` operand.
+  inline explicit BaseMem(Globals::NoInit_) noexcept
+    : Operand(Globals::NoInit) {}
+
+  //! Resets the memory operand - after the reset the memory points to [0].
+  inline void reset() noexcept {
+    _signature = Signature::fromOpType(OperandType::kMem);
+    _baseId = 0;
+    _data[0] = 0;
+    _data[1] = 0;
+  }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline BaseMem& operator=(const BaseMem& other) noexcept { copyFrom(other); return *this; }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Clones the memory operand.
+  inline constexpr BaseMem clone() const noexcept { return BaseMem(*this); }
+
+  //! Creates a new copy of this memory operand adjusted by `off`.
+  inline BaseMem cloneAdjusted(int64_t off) const noexcept {
+    BaseMem result(*this);
+    result.addOffset(off);
+    return result;
+  }
+
+  //! Tests whether this memory operand is a register home (only used by \ref asmjit_compiler)
+  inline constexpr bool isRegHome() const noexcept { return _signature.hasField<Signature::kMemRegHomeFlag>(); }
+  //! Mark this memory operand as register home (only used by \ref asmjit_compiler).
+  inline void setRegHome() noexcept { _signature |= Signature::kMemRegHomeFlag; }
+  //! Marks this operand to not be a register home (only used by \ref asmjit_compiler).
+  inline void clearRegHome() noexcept { _signature &= ~Signature::kMemRegHomeFlag; }
+
+  //! Tests whether the memory operand has a BASE register or label specified.
+  inline constexpr bool hasBase() const noexcept {
+    return (_signature & Signature::kMemBaseTypeMask) != 0;
+  }
+
+  //! Tests whether the memory operand has an INDEX register specified.
+  inline constexpr bool hasIndex() const noexcept {
+    return (_signature & Signature::kMemIndexTypeMask) != 0;
+  }
+
+  //! Tests whether the memory operand has BASE or INDEX register.
+  inline constexpr bool hasBaseOrIndex() const noexcept {
+    return (_signature & Signature::kMemBaseIndexMask) != 0;
+  }
+
+  //! Tests whether the memory operand has BASE and INDEX register.
+  inline constexpr bool hasBaseAndIndex() const noexcept {
+    return (_signature & Signature::kMemBaseTypeMask) != 0 && (_signature & Signature::kMemIndexTypeMask) != 0;
+  }
+
+  //! Tests whether the BASE operand is a label.
+  inline constexpr bool hasBaseLabel() const noexcept {
+    return _signature.subset(Signature::kMemBaseTypeMask) == Signature::fromMemBaseType(RegType::kLabelTag);
+  }
+
+  //! Tests whether the BASE operand is a register (registers start after `RegType::kLabelTag`).
+  inline constexpr bool hasBaseReg() const noexcept {
+    return _signature.subset(Signature::kMemBaseTypeMask).bits() > Signature::fromMemBaseType(RegType::kLabelTag).bits();
+  }
+
+  //! Tests whether the INDEX operand is a register (registers start after `RegType::kLabelTag`).
+  inline constexpr bool hasIndexReg() const noexcept {
+    return _signature.subset(Signature::kMemIndexTypeMask).bits() > Signature::fromMemIndexType(RegType::kLabelTag).bits();
+  }
+
+  //! Returns the type of the BASE register (0 if this memory operand doesn't use the BASE register).
+  //!
+  //! \note If the returned type is one (a value never associated to a register type) the BASE is not register, but it
+  //! is a label. One equals to `kLabelTag`. You should always check `hasBaseLabel()` before using `baseId()` result.
+  inline constexpr RegType baseType() const noexcept { return _signature.memBaseType(); }
+
+  //! Returns the type of an INDEX register (0 if this memory operand doesn't
+  //! use the INDEX register).
+  inline constexpr RegType indexType() const noexcept { return _signature.memIndexType(); }
+
+  //! This is used internally for BASE+INDEX validation.
+  inline constexpr uint32_t baseAndIndexTypes() const noexcept { return _signature.getField<Signature::kMemBaseIndexMask>(); }
+
+  //! Returns both BASE (4:0 bits) and INDEX (9:5 bits) types combined into a single value.
+  //!
+  //! \remarks Returns id of the BASE register or label (if the BASE was specified as label).
+  inline constexpr uint32_t baseId() const noexcept { return _baseId; }
+
+  //! Returns the id of the INDEX register.
+  inline constexpr uint32_t indexId() const noexcept { return _data[kDataMemIndexId]; }
+
+  //! Sets the id of the BASE register (without modifying its type).
+  inline void setBaseId(uint32_t id) noexcept { _baseId = id; }
+  //! Sets the id of the INDEX register (without modifying its type).
+  inline void setIndexId(uint32_t id) noexcept { _data[kDataMemIndexId] = id; }
+
+  //! Sets the base register to type and id of the given `base` operand.
+  inline void setBase(const BaseReg& base) noexcept { return _setBase(base.type(), base.id()); }
+  //! Sets the index register to type and id of the given `index` operand.
+  inline void setIndex(const BaseReg& index) noexcept { return _setIndex(index.type(), index.id()); }
+
+  //! \cond INTERNAL
+  inline void _setBase(RegType type, uint32_t id) noexcept {
+    _signature.setField<Signature::kMemBaseTypeMask>(uint32_t(type));
+    _baseId = id;
+  }
+
+  inline void _setIndex(RegType type, uint32_t id) noexcept {
+    _signature.setField<Signature::kMemIndexTypeMask>(uint32_t(type));
+    _data[kDataMemIndexId] = id;
+  }
+  //! \endcond
+
+  //! Resets the memory operand's BASE register or label.
+  inline void resetBase() noexcept { _setBase(RegType::kNone, 0); }
+  //! Resets the memory operand's INDEX register.
+  inline void resetIndex() noexcept { _setIndex(RegType::kNone, 0); }
+
+  //! Sets the memory operand size (in bytes).
+  inline void setSize(uint32_t size) noexcept { _signature.setField<Signature::kSizeMask>(size); }
+
+  //! Tests whether the memory operand has a 64-bit offset or absolute address.
+  //!
+  //! If this is true then `hasBase()` must always report false.
+  inline constexpr bool isOffset64Bit() const noexcept { return baseType() == RegType::kNone; }
+
+  //! Tests whether the memory operand has a non-zero offset or absolute address.
+  inline constexpr bool hasOffset() const noexcept {
+    return (_data[kDataMemOffsetLo] | uint32_t(_baseId & Support::bitMaskFromBool<uint32_t>(isOffset64Bit()))) != 0;
+  }
+
+  //! Returns either relative offset or absolute address as 64-bit integer.
+  inline constexpr int64_t offset() const noexcept {
+    return isOffset64Bit() ? int64_t(uint64_t(_data[kDataMemOffsetLo]) | (uint64_t(_baseId) << 32))
+                           : int64_t(int32_t(_data[kDataMemOffsetLo])); // Sign extend 32-bit offset.
+  }
+
+  //! Returns a 32-bit low part of a 64-bit offset or absolute address.
+  inline constexpr int32_t offsetLo32() const noexcept { return int32_t(_data[kDataMemOffsetLo]); }
+  //! Returns a 32-but high part of a 64-bit offset or absolute address.
+  //!
+  //! \note This function is UNSAFE and returns garbage if `isOffset64Bit()`
+  //! returns false. Never use it blindly without checking it first.
+  inline constexpr int32_t offsetHi32() const noexcept { return int32_t(_baseId); }
+
+  //! Sets a 64-bit offset or an absolute address to `offset`.
+  //!
+  //! \note This functions attempts to set both high and low parts of a 64-bit offset, however, if the operand has
+  //! a BASE register it will store only the low 32 bits of the offset / address as there is no way to store both
+  //! BASE and 64-bit offset, and there is currently no architecture that has such capability targeted by AsmJit.
+  inline void setOffset(int64_t offset) noexcept {
+    uint32_t lo = uint32_t(uint64_t(offset) & 0xFFFFFFFFu);
+    uint32_t hi = uint32_t(uint64_t(offset) >> 32);
+    uint32_t hiMsk = Support::bitMaskFromBool<uint32_t>(isOffset64Bit());
+
+    _data[kDataMemOffsetLo] = lo;
+    _baseId = (hi & hiMsk) | (_baseId & ~hiMsk);
+  }
+  //! Sets a low 32-bit offset to `offset` (don't use without knowing how BaseMem works).
+  inline void setOffsetLo32(int32_t offset) noexcept { _data[kDataMemOffsetLo] = uint32_t(offset); }
+
+  //! Adjusts the offset by `offset`.
+  //!
+  //! \note This is a fast function that doesn't use the HI 32-bits of a 64-bit offset. Use it only if you know that
+  //! there is a BASE register and the offset is only 32 bits anyway.
+
+  //! Adjusts the memory operand offset by a `offset`.
+  inline void addOffset(int64_t offset) noexcept {
+    if (isOffset64Bit()) {
+      int64_t result = offset + int64_t(uint64_t(_data[kDataMemOffsetLo]) | (uint64_t(_baseId) << 32));
+      _data[kDataMemOffsetLo] = uint32_t(uint64_t(result) & 0xFFFFFFFFu);
+      _baseId                 = uint32_t(uint64_t(result) >> 32);
+    }
+    else {
+      _data[kDataMemOffsetLo] += uint32_t(uint64_t(offset) & 0xFFFFFFFFu);
+    }
+  }
+
+  //! Adds `offset` to a low 32-bit offset part (don't use without knowing how BaseMem works).
+  inline void addOffsetLo32(int32_t offset) noexcept { _data[kDataMemOffsetLo] += uint32_t(offset); }
+
+  //! Resets the memory offset to zero.
+  inline void resetOffset() noexcept { setOffset(0); }
+
+  //! Resets the lo part of the memory offset to zero (don't use without knowing how BaseMem works).
+  inline void resetOffsetLo32() noexcept { setOffsetLo32(0); }
+
+  //! \}
+};
+
+//! Type of the an immediate value.
+enum class ImmType : uint32_t {
+  //! Immediate is integer.
+  kInt = 0,
+  //! Immediate is a floating point stored as double-precision.
+  kDouble = 1
+};
+
+//! Immediate operands are encoded with instruction data.
+class Imm : public Operand {
+public:
+  //! \cond INTERNAL
+  template<typename T>
+  struct IsConstexprConstructibleAsImmType
+    : public std::integral_constant<bool, std::is_enum<T>::value ||
+                                          std::is_pointer<T>::value ||
+                                          std::is_integral<T>::value ||
+                                          std::is_function<T>::value> {};
+
+  template<typename T>
+  struct IsConvertibleToImmType
+    : public std::integral_constant<bool, IsConstexprConstructibleAsImmType<T>::value ||
+                                          std::is_floating_point<T>::value> {};
+  //! \endcond
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new immediate value (initial value is 0).
+  inline constexpr Imm() noexcept
+    : Operand(Globals::Init, Signature::fromOpType(OperandType::kImm), 0, 0, 0) {}
+
+  //! Creates a new immediate value from `other`.
+  inline constexpr Imm(const Imm& other) noexcept
+    : Operand(other) {}
+
+  //! Creates a new immediate value from ARM/AArch64 specific `shift`.
+  inline constexpr Imm(const arm::Shift& shift) noexcept
+    : Operand(Globals::Init,
+              Signature::fromOpType(OperandType::kImm) | Signature::fromPredicate(uint32_t(shift.op())),
+              0,
+              Support::unpackU32At0(shift.value()),
+              Support::unpackU32At1(shift.value())) {}
+
+  //! Creates a new signed immediate value, assigning the value to `val` and an architecture-specific predicate
+  //! to `predicate`.
+  //!
+  //! \note Predicate is currently only used by ARM architectures.
+  template<typename T, typename = typename std::enable_if<IsConstexprConstructibleAsImmType<typename std::decay<T>::type>::value>::type>
+  inline constexpr Imm(const T& val, const uint32_t predicate = 0) noexcept
+    : Operand(Globals::Init,
+              Signature::fromOpType(OperandType::kImm) | Signature::fromPredicate(predicate),
+              0,
+              Support::unpackU32At0(int64_t(val)),
+              Support::unpackU32At1(int64_t(val))) {}
+
+  inline Imm(const float& val, const uint32_t predicate = 0) noexcept
+    : Operand(Globals::Init,
+              Signature::fromOpType(OperandType::kImm) | Signature::fromPredicate(predicate),
+              0,
+              0,
+              0) { setValue(val); }
+
+  inline Imm(const double& val, const uint32_t predicate = 0) noexcept
+    : Operand(Globals::Init,
+              Signature::fromOpType(OperandType::kImm) | Signature::fromPredicate(predicate),
+              0,
+              0,
+              0) { setValue(val); }
+
+  inline explicit Imm(Globals::NoInit_) noexcept
+    : Operand(Globals::NoInit) {}
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  //! Assigns the value of the `other` operand to this immediate.
+  inline Imm& operator=(const Imm& other) noexcept { copyFrom(other); return *this; }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns immediate type.
+  inline constexpr ImmType type() const noexcept { return (ImmType)_signature.getField<Signature::kImmTypeMask>(); }
+  //! Sets the immediate type to `type`.
+  inline void setType(ImmType type) noexcept { _signature.setField<Signature::kImmTypeMask>(uint32_t(type)); }
+  //! Resets immediate type to \ref ImmType::kInt.
+  inline void resetType() noexcept { setType(ImmType::kInt); }
+
+  //! Returns operation predicate of the immediate.
+  //!
+  //! The meaning depends on architecture, for example on ARM hardware this describes \ref arm::ShiftOp
+  //! of the immediate.
+  inline constexpr uint32_t predicate() const noexcept { return _signature.getField<Signature::kPredicateMask>(); }
+
+  //! Sets operation predicate of the immediate to `predicate`.
+  //!
+  //! The meaning depends on architecture, for example on ARM hardware this describes \ref arm::ShiftOp
+  //! of the immediate.
+  inline void setPredicate(uint32_t predicate) noexcept { _signature.setField<Signature::kPredicateMask>(predicate); }
+
+  //! Resets the shift operation type of the immediate to the default value (no operation).
+  inline void resetPredicate() noexcept { _signature.setField<Signature::kPredicateMask>(0); }
+
+  //! Returns the immediate value as `int64_t`, which is the internal format Imm uses.
+  inline constexpr int64_t value() const noexcept {
+    return int64_t((uint64_t(_data[kDataImmValueHi]) << 32) | _data[kDataImmValueLo]);
+  }
+
+  //! Tests whether this immediate value is integer of any size.
+  inline constexpr uint32_t isInt() const noexcept { return type() == ImmType::kInt; }
+  //! Tests whether this immediate value is a double precision floating point value.
+  inline constexpr uint32_t isDouble() const noexcept { return type() == ImmType::kDouble; }
+
+  //! Tests whether the immediate can be casted to 8-bit signed integer.
+  inline constexpr bool isInt8() const noexcept { return type() == ImmType::kInt && Support::isInt8(value()); }
+  //! Tests whether the immediate can be casted to 8-bit unsigned integer.
+  inline constexpr bool isUInt8() const noexcept { return type() == ImmType::kInt && Support::isUInt8(value()); }
+  //! Tests whether the immediate can be casted to 16-bit signed integer.
+  inline constexpr bool isInt16() const noexcept { return type() == ImmType::kInt && Support::isInt16(value()); }
+  //! Tests whether the immediate can be casted to 16-bit unsigned integer.
+  inline constexpr bool isUInt16() const noexcept { return type() == ImmType::kInt && Support::isUInt16(value()); }
+  //! Tests whether the immediate can be casted to 32-bit signed integer.
+  inline constexpr bool isInt32() const noexcept { return type() == ImmType::kInt && Support::isInt32(value()); }
+  //! Tests whether the immediate can be casted to 32-bit unsigned integer.
+  inline constexpr bool isUInt32() const noexcept { return type() == ImmType::kInt && _data[kDataImmValueHi] == 0; }
+
+  //! Returns the immediate value casted to `T`.
+  //!
+  //! The value is masked before it's casted to `T` so the returned value is simply the representation of `T`
+  //! considering the original value's lowest bits.
+  template<typename T>
+  inline T valueAs() const noexcept { return Support::immediateToT<T>(value()); }
+
+  //! Returns low 32-bit signed integer.
+  inline constexpr int32_t int32Lo() const noexcept { return int32_t(_data[kDataImmValueLo]); }
+  //! Returns high 32-bit signed integer.
+  inline constexpr int32_t int32Hi() const noexcept { return int32_t(_data[kDataImmValueHi]); }
+  //! Returns low 32-bit signed integer.
+  inline constexpr uint32_t uint32Lo() const noexcept { return _data[kDataImmValueLo]; }
+  //! Returns high 32-bit signed integer.
+  inline constexpr uint32_t uint32Hi() const noexcept { return _data[kDataImmValueHi]; }
+
+  //! Sets immediate value to `val`, the value is casted to a signed 64-bit integer.
+  template<typename T>
+  inline void setValue(const T& val) noexcept {
+    _setValueInternal(Support::immediateFromT(val), std::is_floating_point<T>::value ? ImmType::kDouble : ImmType::kInt);
+  }
+
+  inline void _setValueInternal(int64_t val, ImmType type) noexcept {
+    setType(type);
+    _data[kDataImmValueHi] = uint32_t(uint64_t(val) >> 32);
+    _data[kDataImmValueLo] = uint32_t(uint64_t(val) & 0xFFFFFFFFu);
+  }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Clones the immediate operand.
+  inline constexpr Imm clone() const noexcept { return Imm(*this); }
+
+  inline void signExtend8Bits() noexcept { setValue(int64_t(valueAs<int8_t>())); }
+  inline void signExtend16Bits() noexcept { setValue(int64_t(valueAs<int16_t>())); }
+  inline void signExtend32Bits() noexcept { setValue(int64_t(valueAs<int32_t>())); }
+
+  inline void zeroExtend8Bits() noexcept { setValue(valueAs<uint8_t>()); }
+  inline void zeroExtend16Bits() noexcept { setValue(valueAs<uint16_t>()); }
+  inline void zeroExtend32Bits() noexcept { _data[kDataImmValueHi] = 0u; }
+
+  //! \}
+};
+
+//! Creates a new immediate operand.
+template<typename T>
+static inline constexpr Imm imm(const T& val) noexcept { return Imm(val); }
+
+//! \}
+
+namespace Globals {
+  //! \ingroup asmjit_assembler
+  //!
+  //! A default-constructed operand of `Operand_::kOpNone` type.
+  static constexpr const Operand none;
+}
+
+//! \cond INTERNAL
+namespace Support {
+
+template<typename T, bool kIsImm>
+struct ForwardOpImpl {
+  static inline const T& forward(const T& value) noexcept { return value; }
+};
+
+template<typename T>
+struct ForwardOpImpl<T, true> {
+  static inline Imm forward(const T& value) noexcept { return Imm(value); }
+};
+
+//! Either forwards operand T or returns a new operand that wraps it if T is a type convertible to operand.
+//! At the moment this is only used to convert integers, floats, and enumarations to \ref Imm operands.
+template<typename T>
+struct ForwardOp : public ForwardOpImpl<T, Imm::IsConvertibleToImmType<typename std::decay<T>::type>::value> {};
+
+} // {Support}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_OPERAND_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/osutils.cpp b/lib/lepton/asmjit/core/osutils.cpp
new file mode 100644
index 0000000000..fa900bfbb4
--- /dev/null
+++ b/lib/lepton/asmjit/core/osutils.cpp
@@ -0,0 +1,84 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/osutils.h"
+#include "../core/support.h"
+
+#if defined(_WIN32)
+  #include <atomic>
+#elif defined(__APPLE__)
+  #include <mach/mach_time.h>
+#else
+  #include <time.h>
+  #include <unistd.h>
+#endif
+
+ASMJIT_BEGIN_NAMESPACE
+
+uint32_t OSUtils::getTickCount() noexcept {
+#if defined(_WIN32)
+  enum HiResStatus : uint32_t {
+    kHiResUnknown      = 0,
+    kHiResAvailable    = 1,
+    kHiResNotAvailable = 2
+  };
+
+  static std::atomic<uint32_t> _hiResStatus(kHiResUnknown);
+  static volatile double _hiResFreq(0);
+
+  uint32_t status = _hiResStatus.load();
+  LARGE_INTEGER now, qpf;
+
+  if (status != kHiResNotAvailable && ::QueryPerformanceCounter(&now)) {
+    double freq = _hiResFreq;
+    if (status == kHiResUnknown) {
+      // Detects the availability of high resolution counter.
+      if (::QueryPerformanceFrequency(&qpf)) {
+        freq = double(qpf.QuadPart) / 1000.0;
+        _hiResFreq = freq;
+        _hiResStatus.compare_exchange_strong(status, kHiResAvailable);
+        status = kHiResAvailable;
+      }
+      else {
+        // High resolution not available.
+        _hiResStatus.compare_exchange_strong(status, kHiResNotAvailable);
+      }
+    }
+
+    if (status == kHiResAvailable)
+      return uint32_t(uint64_t(int64_t(double(now.QuadPart) / freq)) & 0xFFFFFFFFu);
+  }
+
+  // Bail to `GetTickCount()` if we cannot use high resolution.
+  return ::GetTickCount();
+#elif defined(__APPLE__)
+  // See Apple's QA1398.
+  static mach_timebase_info_data_t _machTime;
+
+  uint32_t denom = _machTime.denom;
+  if (ASMJIT_UNLIKELY(!denom)) {
+    if (mach_timebase_info(&_machTime) != KERN_SUCCESS || !(denom = _machTime.denom))
+      return 0;
+  }
+
+  // `mach_absolute_time()` returns nanoseconds, we want milliseconds.
+  uint64_t t = mach_absolute_time() / 1000000u;
+  t = (t * _machTime.numer) / _machTime.denom;
+  return uint32_t(t & 0xFFFFFFFFu);
+#elif defined(_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK >= 0
+  struct timespec ts;
+  if (ASMJIT_UNLIKELY(clock_gettime(CLOCK_MONOTONIC, &ts) != 0))
+    return 0;
+
+  uint64_t t = (uint64_t(ts.tv_sec ) * 1000u) + (uint64_t(ts.tv_nsec) / 1000000u);
+  return uint32_t(t & 0xFFFFFFFFu);
+#else
+  #pragma message("asmjit::OSUtils::getTickCount() doesn't have implementation for the target OS.")
+  return 0;
+#endif
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/osutils.h b/lib/lepton/asmjit/core/osutils.h
new file mode 100644
index 0000000000..3c5c3d94c1
--- /dev/null
+++ b/lib/lepton/asmjit/core/osutils.h
@@ -0,0 +1,61 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_OSUTILS_H_INCLUDED
+#define ASMJIT_CORE_OSUTILS_H_INCLUDED
+
+#include "../core/globals.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_utilities
+//! \{
+
+//! Operating system utilities.
+namespace OSUtils {
+  //! Gets the current CPU tick count, used for benchmarking (1ms resolution).
+  ASMJIT_API uint32_t getTickCount() noexcept;
+};
+
+
+//! \cond INTERNAL
+//! Lock.
+//!
+//! Lock is internal, it cannot be used outside of AsmJit, however, its internal
+//! layout is exposed as it's used by some other classes, which are public.
+class Lock {
+public:
+  ASMJIT_NONCOPYABLE(Lock)
+
+#if defined(_WIN32)
+#pragma pack(push, 8)
+  struct ASMJIT_MAY_ALIAS Handle {
+    void* DebugInfo;
+    long LockCount;
+    long RecursionCount;
+    void* OwningThread;
+    void* LockSemaphore;
+    unsigned long* SpinCount;
+  };
+  Handle _handle;
+#pragma pack(pop)
+#elif !defined(__EMSCRIPTEN__)
+  typedef pthread_mutex_t Handle;
+  Handle _handle;
+#endif
+
+  ASMJIT_FORCE_INLINE Lock() noexcept;
+  ASMJIT_FORCE_INLINE ~Lock() noexcept;
+
+  ASMJIT_FORCE_INLINE void lock() noexcept;
+  ASMJIT_FORCE_INLINE void unlock() noexcept;
+};
+//! \endcond
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_OSUTILS_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/osutils_p.h b/lib/lepton/asmjit/core/osutils_p.h
new file mode 100644
index 0000000000..fd87e73112
--- /dev/null
+++ b/lib/lepton/asmjit/core/osutils_p.h
@@ -0,0 +1,68 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_OSUTILS_P_H_INCLUDED
+#define ASMJIT_CORE_OSUTILS_P_H_INCLUDED
+
+#include "../core/osutils.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_utilities
+//! \{
+
+#if defined(_WIN32)
+
+// Windows implementation.
+static_assert(sizeof(Lock::Handle) == sizeof(CRITICAL_SECTION), "asmjit::Lock::Handle layout must match CRITICAL_SECTION");
+static_assert(alignof(Lock::Handle) == alignof(CRITICAL_SECTION), "asmjit::Lock::Handle alignment must match CRITICAL_SECTION");
+
+ASMJIT_FORCE_INLINE Lock::Lock() noexcept { InitializeCriticalSection(reinterpret_cast<CRITICAL_SECTION*>(&_handle)); }
+ASMJIT_FORCE_INLINE Lock::~Lock() noexcept { DeleteCriticalSection(reinterpret_cast<CRITICAL_SECTION*>(&_handle)); }
+ASMJIT_FORCE_INLINE void Lock::lock() noexcept { EnterCriticalSection(reinterpret_cast<CRITICAL_SECTION*>(&_handle)); }
+ASMJIT_FORCE_INLINE void Lock::unlock() noexcept { LeaveCriticalSection(reinterpret_cast<CRITICAL_SECTION*>(&_handle)); }
+
+#elif !defined(__EMSCRIPTEN__)
+
+// PThread implementation.
+#ifdef PTHREAD_MUTEX_INITIALIZER
+ASMJIT_FORCE_INLINE Lock::Lock() noexcept : _handle(PTHREAD_MUTEX_INITIALIZER) {}
+#else
+ASMJIT_FORCE_INLINE Lock::Lock() noexcept { pthread_mutex_init(&_handle, nullptr); }
+#endif
+ASMJIT_FORCE_INLINE Lock::~Lock() noexcept { pthread_mutex_destroy(&_handle); }
+ASMJIT_FORCE_INLINE void Lock::lock() noexcept { pthread_mutex_lock(&_handle); }
+ASMJIT_FORCE_INLINE void Lock::unlock() noexcept { pthread_mutex_unlock(&_handle); }
+
+#else
+
+// Dummy implementation - Emscripten or other unsupported platform.
+ASMJIT_FORCE_INLINE Lock::Lock() noexcept {}
+ASMJIT_FORCE_INLINE Lock::~Lock() noexcept {}
+ASMJIT_FORCE_INLINE void Lock::lock() noexcept {}
+ASMJIT_FORCE_INLINE void Lock::unlock() noexcept {}
+
+#endif
+
+//! Scoped lock.
+class LockGuard {
+public:
+  ASMJIT_NONCOPYABLE(LockGuard)
+
+  Lock& _target;
+
+  inline LockGuard(Lock& target) noexcept
+    : _target(target) { _target.lock(); }
+  inline ~LockGuard() noexcept { _target.unlock(); }
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_OSUTILS_P_H_INCLUDED
+
diff --git a/lib/lepton/asmjit/core/raassignment_p.h b/lib/lepton/asmjit/core/raassignment_p.h
new file mode 100644
index 0000000000..5418329311
--- /dev/null
+++ b/lib/lepton/asmjit/core/raassignment_p.h
@@ -0,0 +1,418 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_RAASSIGNMENT_P_H_INCLUDED
+#define ASMJIT_CORE_RAASSIGNMENT_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/radefs_p.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_ra
+//! \{
+
+//! Holds the current register assignment.
+//!
+//! Has two purposes:
+//!
+//!   1. Holds register assignment of a local register allocator (see \ref RALocalAllocator).
+//!   2. Holds register assignment of the entry of basic blocks (see \ref RABlock).
+class RAAssignment {
+public:
+  ASMJIT_NONCOPYABLE(RAAssignment)
+
+  enum Ids : uint32_t {
+    kPhysNone = 0xFF,
+    kWorkNone = RAWorkReg::kIdNone
+  };
+
+  enum DirtyBit : uint32_t {
+    kClean = 0,
+    kDirty = 1
+  };
+
+  struct Layout {
+    //! Index of architecture registers per group.
+    RARegIndex physIndex;
+    //! Count of architecture registers per group.
+    RARegCount physCount;
+    //! Count of physical registers of all groups.
+    uint32_t physTotal;
+    //! Count of work registers.
+    uint32_t workCount;
+    //! WorkRegs data (vector).
+    const RAWorkRegs* workRegs;
+
+    inline void reset() noexcept {
+      physIndex.reset();
+      physCount.reset();
+      physTotal = 0;
+      workCount = 0;
+      workRegs = nullptr;
+    }
+  };
+
+  struct PhysToWorkMap {
+    //! Assigned registers (each bit represents one physical reg).
+    RARegMask assigned;
+    //! Dirty registers (spill slot out of sync or no spill slot).
+    RARegMask dirty;
+    //! PhysReg to WorkReg mapping.
+    uint32_t workIds[1 /* ... */];
+
+    static inline size_t sizeOf(size_t count) noexcept {
+      return sizeof(PhysToWorkMap) - sizeof(uint32_t) + count * sizeof(uint32_t);
+    }
+
+    inline void reset(size_t count) noexcept {
+      assigned.reset();
+      dirty.reset();
+
+      for (size_t i = 0; i < count; i++)
+        workIds[i] = kWorkNone;
+    }
+
+    inline void copyFrom(const PhysToWorkMap* other, size_t count) noexcept {
+      size_t size = sizeOf(count);
+      memcpy(this, other, size);
+    }
+
+    inline void unassign(RegGroup group, uint32_t physId, uint32_t indexInWorkIds) noexcept {
+      assigned.clear(group, Support::bitMask(physId));
+      dirty.clear(group, Support::bitMask(physId));
+      workIds[indexInWorkIds] = kWorkNone;
+    }
+  };
+
+  struct WorkToPhysMap {
+    //! WorkReg to PhysReg mapping
+    uint8_t physIds[1 /* ... */];
+
+    static inline size_t sizeOf(size_t count) noexcept {
+      return size_t(count) * sizeof(uint8_t);
+    }
+
+    inline void reset(size_t count) noexcept {
+      for (size_t i = 0; i < count; i++)
+        physIds[i] = kPhysNone;
+    }
+
+    inline void copyFrom(const WorkToPhysMap* other, size_t count) noexcept {
+      size_t size = sizeOf(count);
+      if (ASMJIT_LIKELY(size))
+        memcpy(this, other, size);
+    }
+  };
+
+  //! \name Members
+  //! \{
+
+  //! Physical registers layout.
+  Layout _layout;
+  //! WorkReg to PhysReg mapping.
+  WorkToPhysMap* _workToPhysMap;
+  //! PhysReg to WorkReg mapping and assigned/dirty bits.
+  PhysToWorkMap* _physToWorkMap;
+  //! Optimization to translate PhysRegs to WorkRegs faster.
+  Support::Array<uint32_t*, Globals::kNumVirtGroups> _physToWorkIds;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RAAssignment() noexcept {
+    _layout.reset();
+    resetMaps();
+  }
+
+  ASMJIT_FORCE_INLINE void initLayout(const RARegCount& physCount, const RAWorkRegs& workRegs) noexcept {
+    // Layout must be initialized before data.
+    ASMJIT_ASSERT(_physToWorkMap == nullptr);
+    ASMJIT_ASSERT(_workToPhysMap == nullptr);
+
+    _layout.physIndex.buildIndexes(physCount);
+    _layout.physCount = physCount;
+    _layout.physTotal = uint32_t(_layout.physIndex[RegGroup::kMaxVirt]) +
+                        uint32_t(_layout.physCount[RegGroup::kMaxVirt]) ;
+    _layout.workCount = workRegs.size();
+    _layout.workRegs = &workRegs;
+  }
+
+  ASMJIT_FORCE_INLINE void initMaps(PhysToWorkMap* physToWorkMap, WorkToPhysMap* workToPhysMap) noexcept {
+    _physToWorkMap = physToWorkMap;
+    _workToPhysMap = workToPhysMap;
+    for (RegGroup group : RegGroupVirtValues{})
+      _physToWorkIds[group] = physToWorkMap->workIds + _layout.physIndex.get(group);
+  }
+
+  ASMJIT_FORCE_INLINE void resetMaps() noexcept {
+    _physToWorkMap = nullptr;
+    _workToPhysMap = nullptr;
+    _physToWorkIds.fill(nullptr);
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline PhysToWorkMap* physToWorkMap() const noexcept { return _physToWorkMap; }
+  inline WorkToPhysMap* workToPhysMap() const noexcept { return _workToPhysMap; }
+
+  inline RARegMask& assigned() noexcept { return _physToWorkMap->assigned; }
+  inline const RARegMask& assigned() const noexcept { return _physToWorkMap->assigned; }
+  inline uint32_t assigned(RegGroup group) const noexcept { return _physToWorkMap->assigned[group]; }
+
+  inline RARegMask& dirty() noexcept { return _physToWorkMap->dirty; }
+  inline const RARegMask& dirty() const noexcept { return _physToWorkMap->dirty; }
+  inline RegMask dirty(RegGroup group) const noexcept { return _physToWorkMap->dirty[group]; }
+
+  inline uint32_t workToPhysId(RegGroup group, uint32_t workId) const noexcept {
+    DebugUtils::unused(group);
+    ASMJIT_ASSERT(workId != kWorkNone);
+    ASMJIT_ASSERT(workId < _layout.workCount);
+    return _workToPhysMap->physIds[workId];
+  }
+
+  inline uint32_t physToWorkId(RegGroup group, uint32_t physId) const noexcept {
+    ASMJIT_ASSERT(physId < Globals::kMaxPhysRegs);
+    return _physToWorkIds[group][physId];
+  }
+
+  inline bool isPhysAssigned(RegGroup group, uint32_t physId) const noexcept {
+    ASMJIT_ASSERT(physId < Globals::kMaxPhysRegs);
+    return Support::bitTest(_physToWorkMap->assigned[group], physId);
+  }
+
+  inline bool isPhysDirty(RegGroup group, uint32_t physId) const noexcept {
+    ASMJIT_ASSERT(physId < Globals::kMaxPhysRegs);
+    return Support::bitTest(_physToWorkMap->dirty[group], physId);
+  }
+
+  //! \}
+
+  //! \name Assignment
+  //!
+  //! These are low-level allocation helpers that are used to update the current mappings between physical and
+  //! virt/work registers and also to update masks that represent allocated and dirty registers. These functions
+  //! don't emit any code; they are only used to update and keep all mappings in sync.
+  //!
+  //! \{
+
+  //! Assign [VirtReg/WorkReg] to a physical register.
+  inline void assign(RegGroup group, uint32_t workId, uint32_t physId, bool dirty) noexcept {
+    ASMJIT_ASSERT(workToPhysId(group, workId) == kPhysNone);
+    ASMJIT_ASSERT(physToWorkId(group, physId) == kWorkNone);
+    ASMJIT_ASSERT(!isPhysAssigned(group, physId));
+    ASMJIT_ASSERT(!isPhysDirty(group, physId));
+
+    _workToPhysMap->physIds[workId] = uint8_t(physId);
+    _physToWorkIds[group][physId] = workId;
+
+    RegMask regMask = Support::bitMask(physId);
+    _physToWorkMap->assigned[group] |= regMask;
+    _physToWorkMap->dirty[group] |= regMask & Support::bitMaskFromBool<RegMask>(dirty);
+
+    verify();
+  }
+
+  //! Reassign [VirtReg/WorkReg] to `dstPhysId` from `srcPhysId`.
+  inline void reassign(RegGroup group, uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept {
+    ASMJIT_ASSERT(dstPhysId != srcPhysId);
+    ASMJIT_ASSERT(workToPhysId(group, workId) == srcPhysId);
+    ASMJIT_ASSERT(physToWorkId(group, srcPhysId) == workId);
+    ASMJIT_ASSERT(isPhysAssigned(group, srcPhysId) == true);
+    ASMJIT_ASSERT(isPhysAssigned(group, dstPhysId) == false);
+
+    _workToPhysMap->physIds[workId] = uint8_t(dstPhysId);
+    _physToWorkIds[group][srcPhysId] = kWorkNone;
+    _physToWorkIds[group][dstPhysId] = workId;
+
+    RegMask srcMask = Support::bitMask(srcPhysId);
+    RegMask dstMask = Support::bitMask(dstPhysId);
+
+    bool dirty = (_physToWorkMap->dirty[group] & srcMask) != 0;
+    RegMask regMask = dstMask | srcMask;
+
+    _physToWorkMap->assigned[group] ^= regMask;
+    _physToWorkMap->dirty[group] ^= regMask & Support::bitMaskFromBool<RegMask>(dirty);
+
+    verify();
+  }
+
+  inline void swap(RegGroup group, uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept {
+    ASMJIT_ASSERT(aPhysId != bPhysId);
+    ASMJIT_ASSERT(workToPhysId(group, aWorkId) == aPhysId);
+    ASMJIT_ASSERT(workToPhysId(group, bWorkId) == bPhysId);
+    ASMJIT_ASSERT(physToWorkId(group, aPhysId) == aWorkId);
+    ASMJIT_ASSERT(physToWorkId(group, bPhysId) == bWorkId);
+    ASMJIT_ASSERT(isPhysAssigned(group, aPhysId));
+    ASMJIT_ASSERT(isPhysAssigned(group, bPhysId));
+
+    _workToPhysMap->physIds[aWorkId] = uint8_t(bPhysId);
+    _workToPhysMap->physIds[bWorkId] = uint8_t(aPhysId);
+    _physToWorkIds[group][aPhysId] = bWorkId;
+    _physToWorkIds[group][bPhysId] = aWorkId;
+
+    RegMask aMask = Support::bitMask(aPhysId);
+    RegMask bMask = Support::bitMask(bPhysId);
+    RegMask flipMask = Support::bitMaskFromBool<RegMask>(((_physToWorkMap->dirty[group] & aMask) != 0) ^ ((_physToWorkMap->dirty[group] & bMask) != 0));
+    RegMask regMask = aMask | bMask;
+    _physToWorkMap->dirty[group] ^= regMask & flipMask;
+
+    verify();
+  }
+
+  //! Unassign [VirtReg/WorkReg] from a physical register.
+  inline void unassign(RegGroup group, uint32_t workId, uint32_t physId) noexcept {
+    ASMJIT_ASSERT(physId < Globals::kMaxPhysRegs);
+    ASMJIT_ASSERT(workToPhysId(group, workId) == physId);
+    ASMJIT_ASSERT(physToWorkId(group, physId) == workId);
+    ASMJIT_ASSERT(isPhysAssigned(group, physId));
+
+    _workToPhysMap->physIds[workId] = kPhysNone;
+    _physToWorkIds[group][physId] = kWorkNone;
+
+    RegMask regMask = Support::bitMask(physId);
+    _physToWorkMap->assigned[group] &= ~regMask;
+    _physToWorkMap->dirty[group] &= ~regMask;
+
+    verify();
+  }
+
+  inline void makeClean(RegGroup group, uint32_t workId, uint32_t physId) noexcept {
+    DebugUtils::unused(workId);
+    RegMask regMask = Support::bitMask(physId);
+    _physToWorkMap->dirty[group] &= ~regMask;
+  }
+
+  inline void makeDirty(RegGroup group, uint32_t workId, uint32_t physId) noexcept {
+    DebugUtils::unused(workId);
+    RegMask regMask = Support::bitMask(physId);
+    _physToWorkMap->dirty[group] |= regMask;
+  }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  ASMJIT_FORCE_INLINE void swap(RAAssignment& other) noexcept {
+    std::swap(_workToPhysMap, other._workToPhysMap);
+    std::swap(_physToWorkMap, other._physToWorkMap);
+    _physToWorkIds.swap(other._physToWorkIds);
+  }
+
+  inline void assignWorkIdsFromPhysIds() noexcept {
+    memset(_workToPhysMap, uint8_t(BaseReg::kIdBad), WorkToPhysMap::sizeOf(_layout.workCount));
+
+    for (RegGroup group : RegGroupVirtValues{}) {
+      uint32_t physBaseIndex = _layout.physIndex[group];
+      Support::BitWordIterator<RegMask> it(_physToWorkMap->assigned[group]);
+
+      while (it.hasNext()) {
+        uint32_t physId = it.next();
+        uint32_t workId = _physToWorkMap->workIds[physBaseIndex + physId];
+
+        ASMJIT_ASSERT(workId != kWorkNone);
+        _workToPhysMap->physIds[workId] = uint8_t(physId);
+      }
+    }
+  }
+
+  inline void copyFrom(const PhysToWorkMap* physToWorkMap) noexcept {
+    memcpy(_physToWorkMap, physToWorkMap, PhysToWorkMap::sizeOf(_layout.physTotal));
+    assignWorkIdsFromPhysIds();
+  }
+
+  inline void copyFrom(const PhysToWorkMap* physToWorkMap, const WorkToPhysMap* workToPhysMap) noexcept {
+    memcpy(_physToWorkMap, physToWorkMap, PhysToWorkMap::sizeOf(_layout.physTotal));
+    memcpy(_workToPhysMap, workToPhysMap, WorkToPhysMap::sizeOf(_layout.workCount));
+  }
+
+  inline void copyFrom(const RAAssignment& other) noexcept {
+    copyFrom(other.physToWorkMap(), other.workToPhysMap());
+  }
+
+  // Not really useful outside of debugging.
+  bool equals(const RAAssignment& other) const noexcept {
+    // Layout should always match.
+    if (_layout.physIndex != other._layout.physIndex ||
+        _layout.physCount != other._layout.physCount ||
+        _layout.physTotal != other._layout.physTotal ||
+        _layout.workCount != other._layout.workCount ||
+        _layout.workRegs  != other._layout.workRegs)
+      return false;
+
+    uint32_t physTotal = _layout.physTotal;
+    uint32_t workCount = _layout.workCount;
+
+    for (uint32_t physId = 0; physId < physTotal; physId++) {
+      uint32_t thisWorkId = _physToWorkMap->workIds[physId];
+      uint32_t otherWorkId = other._physToWorkMap->workIds[physId];
+      if (thisWorkId != otherWorkId)
+        return false;
+    }
+
+    for (uint32_t workId = 0; workId < workCount; workId++) {
+      uint32_t thisPhysId = _workToPhysMap->physIds[workId];
+      uint32_t otherPhysId = other._workToPhysMap->physIds[workId];
+      if (thisPhysId != otherPhysId)
+        return false;
+    }
+
+    if (_physToWorkMap->assigned != other._physToWorkMap->assigned ||
+        _physToWorkMap->dirty    != other._physToWorkMap->dirty    )
+      return false;
+
+    return true;
+  }
+
+#if defined(ASMJIT_BUILD_DEBUG)
+  ASMJIT_NOINLINE void verify() noexcept {
+    // Verify WorkToPhysMap.
+    {
+      for (uint32_t workId = 0; workId < _layout.workCount; workId++) {
+        uint32_t physId = _workToPhysMap->physIds[workId];
+        if (physId != kPhysNone) {
+          const RAWorkReg* workReg = _layout.workRegs->at(workId);
+          RegGroup group = workReg->group();
+          ASMJIT_ASSERT(_physToWorkIds[group][physId] == workId);
+        }
+      }
+    }
+
+    // Verify PhysToWorkMap.
+    {
+      for (RegGroup group : RegGroupVirtValues{}) {
+        uint32_t physCount = _layout.physCount[group];
+        for (uint32_t physId = 0; physId < physCount; physId++) {
+          uint32_t workId = _physToWorkIds[group][physId];
+          if (workId != kWorkNone) {
+            ASMJIT_ASSERT(_workToPhysMap->physIds[workId] == physId);
+          }
+        }
+      }
+    }
+  }
+#else
+  inline void verify() noexcept {}
+#endif
+
+  //! \}
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_CORE_RAASSIGNMENT_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/rabuilders_p.h b/lib/lepton/asmjit/core/rabuilders_p.h
new file mode 100644
index 0000000000..1b763030c4
--- /dev/null
+++ b/lib/lepton/asmjit/core/rabuilders_p.h
@@ -0,0 +1,612 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_RABUILDERS_P_H_INCLUDED
+#define ASMJIT_CORE_RABUILDERS_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/formatter.h"
+#include "../core/rapass_p.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_ra
+//! \{
+
+template<typename This>
+class RACFGBuilderT {
+public:
+  enum : uint32_t {
+    kRootIndentation = 2,
+    kCodeIndentation = 4,
+
+    // NOTE: This is a bit hacky. There are some nodes which are processed twice (see `onBeforeInvoke()` and
+    // `onBeforeRet()`) as they can insert some nodes around them. Since we don't have any flags to mark these
+    // we just use their position that is [at that time] unassigned.
+    kNodePositionDidOnBefore = 0xFFFFFFFFu
+  };
+
+  //! \name Members
+  //! \{
+
+  BaseRAPass* _pass = nullptr;
+  BaseCompiler* _cc = nullptr;
+  RABlock* _curBlock = nullptr;
+  RABlock* _retBlock = nullptr;
+  FuncNode* _funcNode = nullptr;
+  RARegsStats _blockRegStats {};
+  uint32_t _exitLabelId = Globals::kInvalidId;
+  ZoneVector<uint32_t> _sharedAssignmentsMap {};
+
+  // Only used by logging, it's fine to be here to prevent more #ifdefs...
+  bool _hasCode = false;
+  RABlock* _lastLoggedBlock = nullptr;
+
+#ifndef ASMJIT_NO_LOGGING
+  Logger* _logger = nullptr;
+  FormatOptions _formatOptions {};
+  StringTmp<512> _sb;
+#endif
+
+  //! \}
+
+  inline RACFGBuilderT(BaseRAPass* pass) noexcept
+    : _pass(pass),
+      _cc(pass->cc()) {
+#ifndef ASMJIT_NO_LOGGING
+    _logger = _pass->hasDiagnosticOption(DiagnosticOptions::kRADebugCFG) ? _pass->logger() : nullptr;
+    if (_logger)
+      _formatOptions = _logger->options();
+#endif
+  }
+
+  inline BaseCompiler* cc() const noexcept { return _cc; }
+
+  //! \name Run
+  //! \{
+
+  //! Called per function by an architecture-specific CFG builder.
+  Error run() noexcept {
+    log("[BuildCFG]\n");
+    ASMJIT_PROPAGATE(prepare());
+
+    logNode(_funcNode, kRootIndentation);
+    logBlock(_curBlock, kRootIndentation);
+
+    RABlock* entryBlock = _curBlock;
+    BaseNode* node = _funcNode->next();
+    if (ASMJIT_UNLIKELY(!node))
+      return DebugUtils::errored(kErrorInvalidState);
+
+    _curBlock->setFirst(_funcNode);
+    _curBlock->setLast(_funcNode);
+
+    RAInstBuilder ib;
+    ZoneVector<RABlock*> blocksWithUnknownJumps;
+
+    for (;;) {
+      BaseNode* next = node->next();
+      ASMJIT_ASSERT(node->position() == 0 || node->position() == kNodePositionDidOnBefore);
+
+      if (node->isInst()) {
+        // Instruction | Jump | Invoke | Return
+        // ------------------------------------
+
+        // Handle `InstNode`, `InvokeNode`, and `FuncRetNode`. All of them share the same interface that provides
+        // operands that have read/write semantics.
+        if (ASMJIT_UNLIKELY(!_curBlock)) {
+          // Unreachable code has to be removed, we cannot allocate registers in such code as we cannot do proper
+          // liveness analysis in such case.
+          removeNode(node);
+          node = next;
+          continue;
+        }
+
+        _hasCode = true;
+
+        if (node->isInvoke() || node->isFuncRet()) {
+          if (node->position() != kNodePositionDidOnBefore) {
+            // Call and Reg are complicated as they may insert some surrounding code around them. The simplest
+            // approach is to get the previous node, call the `onBefore()` handlers and then check whether
+            // anything changed and restart if so. By restart we mean that the current `node` would go back to
+            // the first possible inserted node by `onBeforeInvoke()` or `onBeforeRet()`.
+            BaseNode* prev = node->prev();
+
+            if (node->type() == NodeType::kInvoke)
+              ASMJIT_PROPAGATE(static_cast<This*>(this)->onBeforeInvoke(node->as<InvokeNode>()));
+            else
+              ASMJIT_PROPAGATE(static_cast<This*>(this)->onBeforeRet(node->as<FuncRetNode>()));
+
+            if (prev != node->prev()) {
+              // If this was the first node in the block and something was
+              // inserted before it then we have to update the first block.
+              if (_curBlock->first() == node)
+                _curBlock->setFirst(prev->next());
+
+              node->setPosition(kNodePositionDidOnBefore);
+              node = prev->next();
+
+              // `onBeforeInvoke()` and `onBeforeRet()` can only insert instructions.
+              ASMJIT_ASSERT(node->isInst());
+            }
+
+            // Necessary if something was inserted after `node`, but nothing before.
+            next = node->next();
+          }
+          else {
+            // Change the position back to its original value.
+            node->setPosition(0);
+          }
+        }
+
+        InstNode* inst = node->as<InstNode>();
+        logNode(inst, kCodeIndentation);
+
+        InstControlFlow cf = InstControlFlow::kRegular;
+        ib.reset();
+        ASMJIT_PROPAGATE(static_cast<This*>(this)->onInst(inst, cf, ib));
+
+        if (node->isInvoke()) {
+          ASMJIT_PROPAGATE(static_cast<This*>(this)->onInvoke(inst->as<InvokeNode>(), ib));
+        }
+
+        if (node->isFuncRet()) {
+          ASMJIT_PROPAGATE(static_cast<This*>(this)->onRet(inst->as<FuncRetNode>(), ib));
+          cf = InstControlFlow::kReturn;
+        }
+
+        if (cf == InstControlFlow::kJump) {
+          uint32_t fixedRegCount = 0;
+          for (RATiedReg& tiedReg : ib) {
+            RAWorkReg* workReg = _pass->workRegById(tiedReg.workId());
+            if (workReg->group() == RegGroup::kGp) {
+              uint32_t useId = tiedReg.useId();
+              if (useId == BaseReg::kIdBad) {
+                useId = _pass->_scratchRegIndexes[fixedRegCount++];
+                tiedReg.setUseId(useId);
+              }
+              _curBlock->addExitScratchGpRegs(Support::bitMask(useId));
+            }
+          }
+        }
+
+        ASMJIT_PROPAGATE(_pass->assignRAInst(inst, _curBlock, ib));
+        _blockRegStats.combineWith(ib._stats);
+
+        if (cf != InstControlFlow::kRegular) {
+          // Support for conditional and unconditional jumps.
+          if (cf == InstControlFlow::kJump || cf == InstControlFlow::kBranch) {
+            _curBlock->setLast(node);
+            _curBlock->addFlags(RABlockFlags::kHasTerminator);
+            _curBlock->makeConstructed(_blockRegStats);
+
+            if (!inst->hasOption(InstOptions::kUnfollow)) {
+              // Jmp/Jcc/Call/Loop/etc...
+              uint32_t opCount = inst->opCount();
+              const Operand* opArray = inst->operands();
+
+              // Cannot jump anywhere without operands.
+              if (ASMJIT_UNLIKELY(!opCount))
+                return DebugUtils::errored(kErrorInvalidState);
+
+              if (opArray[opCount - 1].isLabel()) {
+                // Labels are easy for constructing the control flow.
+                LabelNode* labelNode;
+                ASMJIT_PROPAGATE(cc()->labelNodeOf(&labelNode, opArray[opCount - 1].as<Label>()));
+
+                RABlock* targetBlock = _pass->newBlockOrExistingAt(labelNode);
+                if (ASMJIT_UNLIKELY(!targetBlock))
+                  return DebugUtils::errored(kErrorOutOfMemory);
+
+                targetBlock->makeTargetable();
+                ASMJIT_PROPAGATE(_curBlock->appendSuccessor(targetBlock));
+              }
+              else {
+                // Not a label - could be jump with reg/mem operand, which means that it can go anywhere. Such jumps
+                // must either be annotated so the CFG can be properly constructed, otherwise we assume the worst case
+                // - can jump to any basic block.
+                JumpAnnotation* jumpAnnotation = nullptr;
+                _curBlock->addFlags(RABlockFlags::kHasJumpTable);
+
+                if (inst->type() == NodeType::kJump)
+                  jumpAnnotation = inst->as<JumpNode>()->annotation();
+
+                if (jumpAnnotation) {
+                  uint64_t timestamp = _pass->nextTimestamp();
+                  for (uint32_t id : jumpAnnotation->labelIds()) {
+                    LabelNode* labelNode;
+                    ASMJIT_PROPAGATE(cc()->labelNodeOf(&labelNode, id));
+
+                    RABlock* targetBlock = _pass->newBlockOrExistingAt(labelNode);
+                    if (ASMJIT_UNLIKELY(!targetBlock))
+                      return DebugUtils::errored(kErrorOutOfMemory);
+
+                    // Prevents adding basic-block successors multiple times.
+                    if (!targetBlock->hasTimestamp(timestamp)) {
+                      targetBlock->setTimestamp(timestamp);
+                      targetBlock->makeTargetable();
+                      ASMJIT_PROPAGATE(_curBlock->appendSuccessor(targetBlock));
+                    }
+                  }
+                  ASMJIT_PROPAGATE(shareAssignmentAcrossSuccessors(_curBlock));
+                }
+                else {
+                  ASMJIT_PROPAGATE(blocksWithUnknownJumps.append(_pass->allocator(), _curBlock));
+                }
+              }
+            }
+
+            if (cf == InstControlFlow::kJump) {
+              // Unconditional jump makes the code after the jump unreachable, which will be removed instantly during
+              // the CFG construction; as we cannot allocate registers for instructions that are not part of any block.
+              // Of course we can leave these instructions as they are, however, that would only postpone the problem
+              // as assemblers can't encode instructions that use virtual registers.
+              _curBlock = nullptr;
+            }
+            else {
+              node = next;
+              if (ASMJIT_UNLIKELY(!node))
+                return DebugUtils::errored(kErrorInvalidState);
+
+              RABlock* consecutiveBlock;
+              if (node->type() == NodeType::kLabel) {
+                if (node->hasPassData()) {
+                  consecutiveBlock = node->passData<RABlock>();
+                }
+                else {
+                  consecutiveBlock = _pass->newBlock(node);
+                  if (ASMJIT_UNLIKELY(!consecutiveBlock))
+                    return DebugUtils::errored(kErrorOutOfMemory);
+                  node->setPassData<RABlock>(consecutiveBlock);
+                }
+              }
+              else {
+                consecutiveBlock = _pass->newBlock(node);
+                if (ASMJIT_UNLIKELY(!consecutiveBlock))
+                  return DebugUtils::errored(kErrorOutOfMemory);
+              }
+
+              _curBlock->addFlags(RABlockFlags::kHasConsecutive);
+              ASMJIT_PROPAGATE(_curBlock->prependSuccessor(consecutiveBlock));
+
+              _curBlock = consecutiveBlock;
+              _hasCode = false;
+              _blockRegStats.reset();
+
+              if (_curBlock->isConstructed())
+                break;
+              ASMJIT_PROPAGATE(_pass->addBlock(consecutiveBlock));
+
+              logBlock(_curBlock, kRootIndentation);
+              continue;
+            }
+          }
+
+          if (cf == InstControlFlow::kReturn) {
+            _curBlock->setLast(node);
+            _curBlock->makeConstructed(_blockRegStats);
+            ASMJIT_PROPAGATE(_curBlock->appendSuccessor(_retBlock));
+
+            _curBlock = nullptr;
+          }
+        }
+      }
+      else if (node->type() == NodeType::kLabel) {
+        // Label - Basic-Block Management
+        // ------------------------------
+
+        if (!_curBlock) {
+          // If the current code is unreachable the label makes it reachable again. We may remove the whole block in
+          // the future if it's not referenced though.
+          _curBlock = node->passData<RABlock>();
+
+          if (_curBlock) {
+            // If the label has a block assigned we can either continue with it or skip it if the block has been
+            // constructed already.
+            if (_curBlock->isConstructed())
+              break;
+          }
+          else {
+            // No block assigned - create a new one and assign it.
+            _curBlock = _pass->newBlock(node);
+            if (ASMJIT_UNLIKELY(!_curBlock))
+              return DebugUtils::errored(kErrorOutOfMemory);
+            node->setPassData<RABlock>(_curBlock);
+          }
+
+          _curBlock->makeTargetable();
+          _hasCode = false;
+          _blockRegStats.reset();
+          ASMJIT_PROPAGATE(_pass->addBlock(_curBlock));
+        }
+        else {
+          if (node->hasPassData()) {
+            RABlock* consecutive = node->passData<RABlock>();
+            consecutive->makeTargetable();
+
+            if (_curBlock == consecutive) {
+              // The label currently processed is part of the current block. This is only possible for multiple labels
+              // that are right next to each other or labels that are separated by non-code nodes like directives and
+              // comments.
+              if (ASMJIT_UNLIKELY(_hasCode))
+                return DebugUtils::errored(kErrorInvalidState);
+            }
+            else {
+              // Label makes the current block constructed. There is a chance that the Label is not used, but we don't
+              // know that at this point. In the worst case there would be two blocks next to each other, it's just fine.
+              ASMJIT_ASSERT(_curBlock->last() != node);
+              _curBlock->setLast(node->prev());
+              _curBlock->addFlags(RABlockFlags::kHasConsecutive);
+              _curBlock->makeConstructed(_blockRegStats);
+
+              ASMJIT_PROPAGATE(_curBlock->appendSuccessor(consecutive));
+              ASMJIT_PROPAGATE(_pass->addBlock(consecutive));
+
+              _curBlock = consecutive;
+              _hasCode = false;
+              _blockRegStats.reset();
+            }
+          }
+          else {
+            // First time we see this label.
+            if (_hasCode || _curBlock == entryBlock) {
+              // Cannot continue the current block if it already contains some code or it's a block entry. We need to
+              // create a new block and make it a successor.
+              ASMJIT_ASSERT(_curBlock->last() != node);
+              _curBlock->setLast(node->prev());
+              _curBlock->addFlags(RABlockFlags::kHasConsecutive);
+              _curBlock->makeConstructed(_blockRegStats);
+
+              RABlock* consecutive = _pass->newBlock(node);
+              if (ASMJIT_UNLIKELY(!consecutive))
+                return DebugUtils::errored(kErrorOutOfMemory);
+              consecutive->makeTargetable();
+
+              ASMJIT_PROPAGATE(_curBlock->appendSuccessor(consecutive));
+              ASMJIT_PROPAGATE(_pass->addBlock(consecutive));
+
+              _curBlock = consecutive;
+              _hasCode = false;
+              _blockRegStats.reset();
+            }
+
+            node->setPassData<RABlock>(_curBlock);
+          }
+        }
+
+        if (_curBlock && _curBlock != _lastLoggedBlock)
+          logBlock(_curBlock, kRootIndentation);
+        logNode(node, kRootIndentation);
+
+        // Unlikely: Assume that the exit label is reached only once per function.
+        if (ASMJIT_UNLIKELY(node->as<LabelNode>()->labelId() == _exitLabelId)) {
+          _curBlock->setLast(node);
+          _curBlock->makeConstructed(_blockRegStats);
+          ASMJIT_PROPAGATE(_pass->addExitBlock(_curBlock));
+
+          _curBlock = nullptr;
+        }
+      }
+      else {
+        // Other Nodes | Function Exit
+        // ---------------------------
+
+        logNode(node, kCodeIndentation);
+
+        if (node->type() == NodeType::kSentinel) {
+          if (node == _funcNode->endNode()) {
+            // Make sure we didn't flow here if this is the end of the function sentinel.
+            if (ASMJIT_UNLIKELY(_curBlock))
+              return DebugUtils::errored(kErrorInvalidState);
+            break;
+          }
+        }
+        else if (node->type() == NodeType::kFunc) {
+          // RAPass can only compile a single function at a time. If we
+          // encountered a function it must be the current one, bail if not.
+          if (ASMJIT_UNLIKELY(node != _funcNode))
+            return DebugUtils::errored(kErrorInvalidState);
+          // PASS if this is the first node.
+        }
+        else {
+          // PASS if this is a non-interesting or unknown node.
+        }
+      }
+
+      // Advance to the next node.
+      node = next;
+
+      // NOTE: We cannot encounter a NULL node, because every function must be terminated by a sentinel (`stop`)
+      // node. If we encountered a NULL node it means that something went wrong and this node list is corrupted;
+      // bail in such case.
+      if (ASMJIT_UNLIKELY(!node))
+        return DebugUtils::errored(kErrorInvalidState);
+    }
+
+    if (_pass->hasDanglingBlocks())
+      return DebugUtils::errored(kErrorInvalidState);
+
+    for (RABlock* block : blocksWithUnknownJumps)
+      handleBlockWithUnknownJump(block);
+
+    return _pass->initSharedAssignments(_sharedAssignmentsMap);
+  }
+
+  //! \}
+
+  //! \name Prepare
+  //! \{
+
+  //! Prepares the CFG builder of the current function.
+  Error prepare() noexcept {
+    FuncNode* func = _pass->func();
+    BaseNode* node = nullptr;
+
+    // Create entry and exit blocks.
+    _funcNode = func;
+    _retBlock = _pass->newBlockOrExistingAt(func->exitNode(), &node);
+
+    if (ASMJIT_UNLIKELY(!_retBlock))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    _retBlock->makeTargetable();
+    ASMJIT_PROPAGATE(_pass->addExitBlock(_retBlock));
+
+    if (node != func) {
+      _curBlock = _pass->newBlock();
+      if (ASMJIT_UNLIKELY(!_curBlock))
+        return DebugUtils::errored(kErrorOutOfMemory);
+    }
+    else {
+      // Function that has no code at all.
+      _curBlock = _retBlock;
+    }
+
+    // Reset everything we may need.
+    _blockRegStats.reset();
+    _exitLabelId = func->exitNode()->labelId();
+
+    // Initially we assume there is no code in the function body.
+    _hasCode = false;
+
+    return _pass->addBlock(_curBlock);
+  }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Called when a `node` is removed, e.g. because of a dead code elimination.
+  void removeNode(BaseNode* node) noexcept {
+    logNode(node, kRootIndentation, "<Removed>");
+    cc()->removeNode(node);
+  }
+
+  //! Handles block with unknown jump, which could be a jump to a jump table.
+  //!
+  //! If we encounter such block we basically insert all existing blocks as successors except the function entry
+  //! block and a natural successor, if such block exists.
+  Error handleBlockWithUnknownJump(RABlock* block) noexcept {
+    RABlocks& blocks = _pass->blocks();
+    size_t blockCount = blocks.size();
+
+    // NOTE: Iterate from `1` as the first block is the entry block, we don't
+    // allow the entry to be a successor of any block.
+    RABlock* consecutive = block->consecutive();
+    for (size_t i = 1; i < blockCount; i++) {
+      RABlock* candidate = blocks[i];
+      if (candidate == consecutive || !candidate->isTargetable())
+        continue;
+      block->appendSuccessor(candidate);
+    }
+
+    return shareAssignmentAcrossSuccessors(block);
+  }
+
+  Error shareAssignmentAcrossSuccessors(RABlock* block) noexcept {
+    if (block->successors().size() <= 1)
+      return kErrorOk;
+
+    RABlock* consecutive = block->consecutive();
+    uint32_t sharedAssignmentId = Globals::kInvalidId;
+
+    for (RABlock* successor : block->successors()) {
+      if (successor == consecutive)
+        continue;
+
+      if (successor->hasSharedAssignmentId()) {
+        if (sharedAssignmentId == Globals::kInvalidId)
+          sharedAssignmentId = successor->sharedAssignmentId();
+        else
+          _sharedAssignmentsMap[successor->sharedAssignmentId()] = sharedAssignmentId;
+      }
+      else {
+        if (sharedAssignmentId == Globals::kInvalidId)
+          ASMJIT_PROPAGATE(newSharedAssignmentId(&sharedAssignmentId));
+        successor->setSharedAssignmentId(sharedAssignmentId);
+      }
+    }
+    return kErrorOk;
+  }
+
+  Error newSharedAssignmentId(uint32_t* out) noexcept {
+    uint32_t id = _sharedAssignmentsMap.size();
+    ASMJIT_PROPAGATE(_sharedAssignmentsMap.append(_pass->allocator(), id));
+
+    *out = id;
+    return kErrorOk;
+  }
+
+  //! \}
+
+  //! \name Logging
+  //! \{
+
+#ifndef ASMJIT_NO_LOGGING
+  template<typename... Args>
+  inline void log(const char* fmt, Args&&... args) noexcept {
+    if (_logger)
+      _logger->logf(fmt, std::forward<Args>(args)...);
+  }
+
+  inline void logBlock(RABlock* block, uint32_t indentation = 0) noexcept {
+    if (_logger)
+      _logBlock(block, indentation);
+  }
+
+  inline void logNode(BaseNode* node, uint32_t indentation = 0, const char* action = nullptr) noexcept {
+    if (_logger)
+      _logNode(node, indentation, action);
+  }
+
+  void _logBlock(RABlock* block, uint32_t indentation) noexcept {
+    _sb.clear();
+    _sb.appendChars(' ', indentation);
+    _sb.appendFormat("{#%u}\n", block->blockId());
+    _logger->log(_sb);
+    _lastLoggedBlock = block;
+  }
+
+  void _logNode(BaseNode* node, uint32_t indentation, const char* action) noexcept {
+    _sb.clear();
+    _sb.appendChars(' ', indentation);
+    if (action) {
+      _sb.append(action);
+      _sb.append(' ');
+    }
+    Formatter::formatNode(_sb, _formatOptions, cc(), node);
+    _sb.append('\n');
+    _logger->log(_sb);
+  }
+#else
+  template<typename... Args>
+  inline void log(const char* fmt, Args&&... args) noexcept {
+    DebugUtils::unused(fmt);
+    DebugUtils::unused(std::forward<Args>(args)...);
+  }
+
+  inline void logBlock(RABlock* block, uint32_t indentation = 0) noexcept {
+    DebugUtils::unused(block, indentation);
+  }
+
+  inline void logNode(BaseNode* node, uint32_t indentation = 0, const char* action = nullptr) noexcept {
+    DebugUtils::unused(node, indentation, action);
+  }
+#endif
+
+  //! \}
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_CORE_RABUILDERS_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/radefs_p.h b/lib/lepton/asmjit/core/radefs_p.h
new file mode 100644
index 0000000000..15c50ff728
--- /dev/null
+++ b/lib/lepton/asmjit/core/radefs_p.h
@@ -0,0 +1,1204 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_RADEFS_P_H_INCLUDED
+#define ASMJIT_CORE_RADEFS_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#include "../core/archtraits.h"
+#include "../core/compilerdefs.h"
+#include "../core/logger.h"
+#include "../core/operand.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../core/zone.h"
+#include "../core/zonevector.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_ra
+//! \{
+
+#ifndef ASMJIT_NO_LOGGING
+# define ASMJIT_RA_LOG_FORMAT(...)  \
+  do {                              \
+    if (logger)                     \
+      logger->logf(__VA_ARGS__);    \
+  } while (0)
+# define ASMJIT_RA_LOG_COMPLEX(...) \
+  do {                              \
+    if (logger) {                   \
+      __VA_ARGS__                   \
+    }                               \
+  } while (0)
+#else
+# define ASMJIT_RA_LOG_FORMAT(...) ((void)0)
+# define ASMJIT_RA_LOG_COMPLEX(...) ((void)0)
+#endif
+
+class BaseRAPass;
+class RABlock;
+class BaseNode;
+struct RAStackSlot;
+
+typedef ZoneVector<RABlock*> RABlocks;
+typedef ZoneVector<RAWorkReg*> RAWorkRegs;
+
+//! Maximum number of consecutive registers aggregated from all supported backends.
+static constexpr uint32_t kMaxConsecutiveRegs = 4;
+
+//! Provides architecture constraints used by register allocator.
+class RAConstraints {
+public:
+  //! \name Members
+  //! \{
+
+  Support::Array<RegMask, Globals::kNumVirtGroups> _availableRegs {};
+
+  //! \}
+
+  ASMJIT_NOINLINE Error init(Arch arch) noexcept {
+    switch (arch) {
+      case Arch::kX86:
+      case Arch::kX64: {
+        uint32_t registerCount = arch == Arch::kX86 ? 8 : 16;
+        _availableRegs[RegGroup::kGp] = Support::lsbMask<RegMask>(registerCount) & ~Support::bitMask(4u);
+        _availableRegs[RegGroup::kVec] = Support::lsbMask<RegMask>(registerCount);
+        _availableRegs[RegGroup::kExtraVirt2] = Support::lsbMask<RegMask>(8);
+        _availableRegs[RegGroup::kExtraVirt3] = Support::lsbMask<RegMask>(8);
+        return kErrorOk;
+      }
+
+      case Arch::kAArch64: {
+        _availableRegs[RegGroup::kGp] = 0xFFFFFFFFu & ~Support::bitMask(18, 31u);
+        _availableRegs[RegGroup::kVec] = 0xFFFFFFFFu;
+        _availableRegs[RegGroup::kExtraVirt2] = 0;
+        _availableRegs[RegGroup::kExtraVirt3] = 0;
+        return kErrorOk;
+      }
+
+      default:
+        return DebugUtils::errored(kErrorInvalidArch);
+    }
+  }
+
+  inline RegMask availableRegs(RegGroup group) const noexcept { return _availableRegs[group]; }
+};
+
+enum class RAStrategyType : uint8_t {
+  kSimple  = 0,
+  kComplex = 1
+};
+ASMJIT_DEFINE_ENUM_COMPARE(RAStrategyType)
+
+enum class RAStrategyFlags : uint8_t {
+  kNone = 0
+};
+ASMJIT_DEFINE_ENUM_FLAGS(RAStrategyFlags)
+
+//! Register allocation strategy.
+//!
+//! The idea is to select the best register allocation strategy for each virtual register group based on the
+//! complexity of the code.
+struct RAStrategy {
+  //! \name Members
+  //! \{
+
+  RAStrategyType _type = RAStrategyType::kSimple;
+  RAStrategyFlags _flags = RAStrategyFlags::kNone;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline void reset() noexcept {
+    _type = RAStrategyType::kSimple;
+    _flags = RAStrategyFlags::kNone;
+  }
+
+  inline RAStrategyType type() const noexcept { return _type; }
+  inline void setType(RAStrategyType type) noexcept { _type = type; }
+
+  inline bool isSimple() const noexcept { return _type == RAStrategyType::kSimple; }
+  inline bool isComplex() const noexcept { return _type >= RAStrategyType::kComplex; }
+
+  inline RAStrategyFlags flags() const noexcept { return _flags; }
+  inline bool hasFlag(RAStrategyFlags flag) const noexcept { return Support::test(_flags, flag); }
+  inline void addFlags(RAStrategyFlags flags) noexcept { _flags |= flags; }
+
+  //! \}
+};
+
+//! Count of virtual or physical registers per group.
+//!
+//! \note This class uses 8-bit integers to represent counters, it's only used in places where this is sufficient,
+//! for example total count of machine's physical registers, count of virtual registers per instruction, etc...
+//! There is also `RALiveCount`, which uses 32-bit integers and is indeed much safer.
+struct RARegCount {
+  //! \name Members
+  //! \{
+
+  union {
+    uint8_t _regs[4];
+    uint32_t _packed;
+  };
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Resets all counters to zero.
+  inline void reset() noexcept { _packed = 0; }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline uint8_t& operator[](RegGroup group) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _regs[size_t(group)];
+  }
+
+  inline const uint8_t& operator[](RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _regs[size_t(group)];
+  }
+
+  inline bool operator==(const RARegCount& other) const noexcept { return _packed == other._packed; }
+  inline bool operator!=(const RARegCount& other) const noexcept { return _packed != other._packed; }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the count of registers by the given register `group`.
+  inline uint32_t get(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+
+    uint32_t shift = Support::byteShiftOfDWordStruct(uint32_t(group));
+    return (_packed >> shift) & uint32_t(0xFF);
+  }
+
+  //! Sets the register count by a register `group`.
+  inline void set(RegGroup group, uint32_t n) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    ASMJIT_ASSERT(n <= 0xFF);
+
+    uint32_t shift = Support::byteShiftOfDWordStruct(uint32_t(group));
+    _packed = (_packed & ~uint32_t(0xFF << shift)) + (n << shift);
+  }
+
+  //! Adds the register count by a register `group`.
+  inline void add(RegGroup group, uint32_t n = 1) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    ASMJIT_ASSERT(0xFF - uint32_t(_regs[size_t(group)]) >= n);
+
+    uint32_t shift = Support::byteShiftOfDWordStruct(uint32_t(group));
+    _packed += n << shift;
+  }
+
+  //! \}
+};
+
+//! Provides mapping that can be used to fast index architecture register groups.
+struct RARegIndex : public RARegCount {
+  //! Build register indexes based on the given `count` of registers.
+  ASMJIT_FORCE_INLINE void buildIndexes(const RARegCount& count) noexcept {
+    uint32_t x = uint32_t(count._regs[0]);
+    uint32_t y = uint32_t(count._regs[1]) + x;
+    uint32_t z = uint32_t(count._regs[2]) + y;
+
+    ASMJIT_ASSERT(y <= 0xFF);
+    ASMJIT_ASSERT(z <= 0xFF);
+    _packed = Support::bytepack32_4x8(0, x, y, z);
+  }
+};
+
+//! Registers mask.
+struct RARegMask {
+  //! \name Members
+  //! \{
+
+  Support::Array<RegMask, Globals::kNumVirtGroups> _masks;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline void init(const RARegMask& other) noexcept { _masks = other._masks; }
+  //! Reset all register masks to zero.
+  inline void reset() noexcept { _masks.fill(0); }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline bool operator==(const RARegMask& other) const noexcept { return _masks == other._masks; }
+  inline bool operator!=(const RARegMask& other) const noexcept { return _masks != other._masks; }
+
+  template<typename Index>
+  inline uint32_t& operator[](const Index& index) noexcept { return _masks[index]; }
+
+  template<typename Index>
+  inline const uint32_t& operator[](const Index& index) const noexcept { return _masks[index]; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Tests whether all register masks are zero (empty).
+  inline bool empty() const noexcept {
+    return _masks.aggregate<Support::Or>() == 0;
+  }
+
+  inline bool has(RegGroup group, RegMask mask = 0xFFFFFFFFu) const noexcept {
+    return (_masks[group] & mask) != 0;
+  }
+
+  template<class Operator>
+  inline void op(const RARegMask& other) noexcept {
+    _masks.combine<Operator>(other._masks);
+  }
+
+  template<class Operator>
+  inline void op(RegGroup group, RegMask mask) noexcept {
+    _masks[group] = Operator::op(_masks[group], mask);
+  }
+
+  inline void clear(RegGroup group, RegMask mask) noexcept {
+    _masks[group] = _masks[group] & ~mask;
+  }
+
+  //! \}
+};
+
+//! Information associated with each instruction, propagated to blocks, loops, and the whole function. This
+//! information can be used to do minor decisions before the register allocator tries to do its job. For
+//! example to use fast register allocation inside a block or loop it cannot have clobbered and/or fixed
+//! registers, etc...
+class RARegsStats {
+public:
+  //! \name Constants
+  //! \{
+
+  enum Index : uint32_t {
+    kIndexUsed       = 0,
+    kIndexFixed      = 8,
+    kIndexClobbered  = 16
+  };
+
+  enum Mask : uint32_t {
+    kMaskUsed        = 0xFFu << kIndexUsed,
+    kMaskFixed       = 0xFFu << kIndexFixed,
+    kMaskClobbered   = 0xFFu << kIndexClobbered
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  uint32_t _packed = 0;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline void reset() noexcept { _packed = 0; }
+  inline void combineWith(const RARegsStats& other) noexcept { _packed |= other._packed; }
+
+  inline bool hasUsed() const noexcept { return (_packed & kMaskUsed) != 0u; }
+  inline bool hasUsed(RegGroup group) const noexcept { return (_packed & Support::bitMask(kIndexUsed + uint32_t(group))) != 0u; }
+  inline void makeUsed(RegGroup group) noexcept { _packed |= Support::bitMask(kIndexUsed + uint32_t(group)); }
+
+  inline bool hasFixed() const noexcept { return (_packed & kMaskFixed) != 0u; }
+  inline bool hasFixed(RegGroup group) const noexcept { return (_packed & Support::bitMask(kIndexFixed + uint32_t(group))) != 0u; }
+  inline void makeFixed(RegGroup group) noexcept { _packed |= Support::bitMask(kIndexFixed + uint32_t(group)); }
+
+  inline bool hasClobbered() const noexcept { return (_packed & kMaskClobbered) != 0u; }
+  inline bool hasClobbered(RegGroup group) const noexcept { return (_packed & Support::bitMask(kIndexClobbered + uint32_t(group))) != 0u; }
+  inline void makeClobbered(RegGroup group) noexcept { _packed |= Support::bitMask(kIndexClobbered + uint32_t(group)); }
+
+  //! \}
+};
+
+//! Count of live registers, per group.
+class RALiveCount {
+public:
+  //! \name Members
+  //! \{
+
+  Support::Array<uint32_t, Globals::kNumVirtGroups> n {};
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RALiveCount() noexcept = default;
+  inline RALiveCount(const RALiveCount& other) noexcept = default;
+
+  inline void init(const RALiveCount& other) noexcept { n = other.n; }
+  inline void reset() noexcept { n.fill(0); }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline RALiveCount& operator=(const RALiveCount& other) noexcept = default;
+
+  inline uint32_t& operator[](RegGroup group) noexcept { return n[group]; }
+  inline const uint32_t& operator[](RegGroup group) const noexcept { return n[group]; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  template<class Operator>
+  inline void op(const RALiveCount& other) noexcept { n.combine<Operator>(other.n); }
+
+  //! \}
+};
+
+struct RALiveInterval {
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    kNaN = 0,
+    kInf = 0xFFFFFFFFu
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  uint32_t a, b;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RALiveInterval() noexcept : a(0), b(0) {}
+  inline RALiveInterval(uint32_t a, uint32_t b) noexcept : a(a), b(b) {}
+  inline RALiveInterval(const RALiveInterval& other) noexcept : a(other.a), b(other.b) {}
+
+  inline void init(uint32_t aVal, uint32_t bVal) noexcept {
+    a = aVal;
+    b = bVal;
+  }
+  inline void init(const RALiveInterval& other) noexcept { init(other.a, other.b); }
+  inline void reset() noexcept { init(0, 0); }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline RALiveInterval& operator=(const RALiveInterval& other) = default;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool isValid() const noexcept { return a < b; }
+  inline uint32_t width() const noexcept { return b - a; }
+
+  //! \}
+};
+
+//! Live span with payload of type `T`.
+template<typename T>
+class RALiveSpan : public RALiveInterval, public T {
+public:
+  //! \name Types
+  //! \{
+
+  typedef T DataType;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RALiveSpan() noexcept : RALiveInterval(), T() {}
+  inline RALiveSpan(const RALiveSpan<T>& other) noexcept : RALiveInterval(other), T() {}
+  inline RALiveSpan(const RALiveInterval& interval, const T& data) noexcept : RALiveInterval(interval), T(data) {}
+  inline RALiveSpan(uint32_t a, uint32_t b) noexcept : RALiveInterval(a, b), T() {}
+  inline RALiveSpan(uint32_t a, uint32_t b, const T& data) noexcept : RALiveInterval(a, b), T(data) {}
+
+  inline void init(const RALiveSpan<T>& other) noexcept {
+    RALiveInterval::init(static_cast<const RALiveInterval&>(other));
+    T::init(static_cast<const T&>(other));
+  }
+
+  inline void init(const RALiveSpan<T>& span, const T& data) noexcept {
+    RALiveInterval::init(static_cast<const RALiveInterval&>(span));
+    T::init(data);
+  }
+
+  inline void init(const RALiveInterval& interval, const T& data) noexcept {
+    RALiveInterval::init(interval);
+    T::init(data);
+  }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline RALiveSpan& operator=(const RALiveSpan& other) {
+    init(other);
+    return *this;
+  }
+
+  //! \}
+};
+
+//! Vector of `RALiveSpan<T>` with additional convenience API.
+template<typename T>
+class RALiveSpans {
+public:
+  ASMJIT_NONCOPYABLE(RALiveSpans)
+
+  typedef typename T::DataType DataType;
+  ZoneVector<T> _data;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RALiveSpans() noexcept : _data() {}
+
+  inline void reset() noexcept { _data.reset(); }
+  inline void release(ZoneAllocator* allocator) noexcept { _data.release(allocator); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool empty() const noexcept { return _data.empty(); }
+  inline uint32_t size() const noexcept { return _data.size(); }
+
+  inline T* data() noexcept { return _data.data(); }
+  inline const T* data() const noexcept { return _data.data(); }
+
+  inline bool isOpen() const noexcept {
+    uint32_t size = _data.size();
+    return size > 0 && _data[size - 1].b == RALiveInterval::kInf;
+  }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void swap(RALiveSpans<T>& other) noexcept { _data.swap(other._data); }
+
+  //! Open the current live span.
+  ASMJIT_FORCE_INLINE Error openAt(ZoneAllocator* allocator, uint32_t start, uint32_t end) noexcept {
+    bool wasOpen;
+    return openAt(allocator, start, end, wasOpen);
+  }
+
+  ASMJIT_FORCE_INLINE Error openAt(ZoneAllocator* allocator, uint32_t start, uint32_t end, bool& wasOpen) noexcept {
+    uint32_t size = _data.size();
+    wasOpen = false;
+
+    if (size > 0) {
+      T& last = _data[size - 1];
+      if (last.b >= start) {
+        wasOpen = last.b > start;
+        last.b = end;
+        return kErrorOk;
+      }
+    }
+
+    return _data.append(allocator, T(start, end));
+  }
+
+  ASMJIT_FORCE_INLINE void closeAt(uint32_t end) noexcept {
+    ASMJIT_ASSERT(!empty());
+
+    uint32_t size = _data.size();
+    _data[size - 1].b = end;
+  }
+
+  //! Returns the sum of width of all spans.
+  //!
+  //! \note Don't overuse, this iterates over all spans so it's O(N). It should be only called once and then cached.
+  inline uint32_t width() const noexcept {
+    uint32_t width = 0;
+    for (const T& span : _data)
+      width += span.width();
+    return width;
+  }
+
+  inline T& operator[](uint32_t index) noexcept { return _data[index]; }
+  inline const T& operator[](uint32_t index) const noexcept { return _data[index]; }
+
+  inline bool intersects(const RALiveSpans<T>& other) const noexcept {
+    return intersects(*this, other);
+  }
+
+  ASMJIT_FORCE_INLINE Error nonOverlappingUnionOf(ZoneAllocator* allocator, const RALiveSpans<T>& x, const RALiveSpans<T>& y, const DataType& yData) noexcept {
+    uint32_t finalSize = x.size() + y.size();
+    ASMJIT_PROPAGATE(_data.reserve(allocator, finalSize));
+
+    T* dstPtr = _data.data();
+    const T* xSpan = x.data();
+    const T* ySpan = y.data();
+
+    const T* xEnd = xSpan + x.size();
+    const T* yEnd = ySpan + y.size();
+
+    // Loop until we have intersection or either `xSpan == xEnd` or `ySpan == yEnd`, which means that there is no
+    // intersection. We advance either `xSpan` or `ySpan` depending on their ranges.
+    if (xSpan != xEnd && ySpan != yEnd) {
+      uint32_t xa, ya;
+      xa = xSpan->a;
+      for (;;) {
+        while (ySpan->b <= xa) {
+          dstPtr->init(*ySpan, yData);
+          dstPtr++;
+          if (++ySpan == yEnd)
+            goto Done;
+        }
+
+        ya = ySpan->a;
+        while (xSpan->b <= ya) {
+          *dstPtr++ = *xSpan;
+          if (++xSpan == xEnd)
+            goto Done;
+        }
+
+        // We know that `xSpan->b > ySpan->a`, so check if `ySpan->b > xSpan->a`.
+        xa = xSpan->a;
+        if (ySpan->b > xa)
+          return 0xFFFFFFFFu;
+      }
+    }
+
+  Done:
+    while (xSpan != xEnd) {
+      *dstPtr++ = *xSpan++;
+    }
+
+    while (ySpan != yEnd) {
+      dstPtr->init(*ySpan, yData);
+      dstPtr++;
+      ySpan++;
+    }
+
+    _data._setEndPtr(dstPtr);
+    return kErrorOk;
+  }
+
+  static ASMJIT_FORCE_INLINE bool intersects(const RALiveSpans<T>& x, const RALiveSpans<T>& y) noexcept {
+    const T* xSpan = x.data();
+    const T* ySpan = y.data();
+
+    const T* xEnd = xSpan + x.size();
+    const T* yEnd = ySpan + y.size();
+
+    // Loop until we have intersection or either `xSpan == xEnd` or `ySpan == yEnd`, which means that there is no
+    // intersection. We advance either `xSpan` or `ySpan` depending on their end positions.
+    if (xSpan == xEnd || ySpan == yEnd)
+      return false;
+
+    uint32_t xa, ya;
+    xa = xSpan->a;
+
+    for (;;) {
+      while (ySpan->b <= xa)
+        if (++ySpan == yEnd)
+          return false;
+
+      ya = ySpan->a;
+      while (xSpan->b <= ya)
+        if (++xSpan == xEnd)
+          return false;
+
+      // We know that `xSpan->b > ySpan->a`, so check if `ySpan->b > xSpan->a`.
+      xa = xSpan->a;
+      if (ySpan->b > xa)
+        return true;
+    }
+  }
+
+  //! \}
+};
+
+//! Statistics about a register liveness.
+class RALiveStats {
+public:
+  uint32_t _width = 0;
+  float _freq = 0.0f;
+  float _priority = 0.0f;
+
+  //! \name Accessors
+  //! \{
+
+  inline uint32_t width() const noexcept { return _width; }
+  inline float freq() const noexcept { return _freq; }
+  inline float priority() const noexcept { return _priority; }
+
+  //! \}
+};
+
+struct LiveRegData {
+  uint32_t id;
+
+  inline explicit LiveRegData(uint32_t id = BaseReg::kIdBad) noexcept : id(id) {}
+  inline LiveRegData(const LiveRegData& other) noexcept : id(other.id) {}
+
+  inline void init(const LiveRegData& other) noexcept { id = other.id; }
+
+  inline bool operator==(const LiveRegData& other) const noexcept { return id == other.id; }
+  inline bool operator!=(const LiveRegData& other) const noexcept { return id != other.id; }
+};
+
+typedef RALiveSpan<LiveRegData> LiveRegSpan;
+typedef RALiveSpans<LiveRegSpan> LiveRegSpans;
+
+//! Flags used by \ref RATiedReg.
+//!
+//! Register access information is encoded in 4 flags in total:
+//!
+//!   - `kRead`  - Register is Read    (ReadWrite if combined with `kWrite`).
+//!   - `kWrite` - Register is Written (ReadWrite if combined with `kRead`).
+//!   - `kUse`   - Encoded as Read or ReadWrite.
+//!   - `kOut`   - Encoded as WriteOnly.
+//!
+//! Let's describe all of these on two X86 instructions:
+//!
+//!   - ADD x{R|W|Use},  x{R|Use}              -> {x:R|W|Use            }
+//!   - LEA x{  W|Out}, [x{R|Use} + x{R|Out}]  -> {x:R|W|Use|Out        }
+//!   - ADD x{R|W|Use},  y{R|Use}              -> {x:R|W|Use     y:R|Use}
+//!   - LEA x{  W|Out}, [x{R|Use} + y{R|Out}]  -> {x:R|W|Use|Out y:R|Use}
+//!
+//! It should be obvious from the example above how these flags get created. Each operand contains READ/WRITE
+//! information, which is then merged to RATiedReg's flags. However, we also need to represent the possitility
+//! to view the operation as two independent operations - USE and OUT, because the register allocator first
+//! allocates USE registers, and then assigns OUT registers independently of USE registers.
+enum class RATiedFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+
+  // Access Flags
+  // ------------
+
+  //! Register is read.
+  kRead = uint32_t(OpRWFlags::kRead),
+  //! Register is written.
+  kWrite = uint32_t(OpRWFlags::kWrite),
+  //! Register both read and written.
+  kRW = uint32_t(OpRWFlags::kRW),
+
+  // Use / Out Flags
+  // ---------------
+
+  //! Register has a USE slot (read/rw).
+  kUse = 0x00000004u,
+  //! Register has an OUT slot (write-only).
+  kOut = 0x00000008u,
+  //! Register in USE slot can be patched to memory.
+  kUseRM = 0x00000010u,
+  //! Register in OUT slot can be patched to memory.
+  kOutRM = 0x00000020u,
+
+  //! Register has a fixed USE slot.
+  kUseFixed = 0x00000040u,
+  //! Register has a fixed OUT slot.
+  kOutFixed = 0x00000080u,
+  //! Register USE slot has been allocated.
+  kUseDone = 0x00000100u,
+  //! Register OUT slot has been allocated.
+  kOutDone = 0x00000200u,
+
+  // Consecutive Flags / Data
+  // ------------------------
+
+  kUseConsecutive = 0x00000400u,
+  kOutConsecutive = 0x00000800u,
+  kLeadConsecutive = 0x00001000u,
+  kConsecutiveData = 0x00006000u,
+
+  // Liveness Flags
+  // --------------
+
+  //! Register must be duplicated (function call only).
+  kDuplicate = 0x00010000u,
+  //! Last occurrence of this VirtReg in basic block.
+  kLast = 0x00020000u,
+  //! Kill this VirtReg after use.
+  kKill = 0x00040000u,
+
+  // X86 Specific Flags
+  // ------------------
+
+  // Architecture specific flags are used during RATiedReg building to ensure that architecture-specific constraints
+  // are handled properly. These flags are not really needed after RATiedReg[] is built and copied to `RAInst`.
+
+  //! This RATiedReg references GPB-LO or GPB-HI.
+  kX86_Gpb = 0x01000000u,
+
+  // Instruction Flags (Never used by RATiedReg)
+  // -------------------------------------------
+
+  //! Instruction is transformable to another instruction if necessary.
+  //!
+  //! This is flag that is only used by \ref RAInst to inform register allocator that the instruction has some
+  //! constraints that can only be solved by transforming the instruction into another instruction, most likely
+  //! by changing its InstId.
+  kInst_IsTransformable = 0x80000000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(RATiedFlags)
+
+static_assert(uint32_t(RATiedFlags::kRead ) == 0x1, "RATiedFlags::kRead must be 0x1");
+static_assert(uint32_t(RATiedFlags::kWrite) == 0x2, "RATiedFlags::kWrite must be 0x2");
+static_assert(uint32_t(RATiedFlags::kRW   ) == 0x3, "RATiedFlags::kRW must be 0x3");
+
+//! Tied register merges one ore more register operand into a single entity. It contains information about its access
+//! (Read|Write) and allocation slots (Use|Out) that are used by the register allocator and liveness analysis.
+struct RATiedReg {
+  //! \name Members
+  //! \{
+
+  //! WorkReg id.
+  uint32_t _workId;
+  //! WorkReg id that is an immediate consecutive parent of this register, or Globals::kInvalidId if it has no parent.
+  uint32_t _consecutiveParent;
+  //! Allocation flags.
+  RATiedFlags _flags;
+
+  union {
+    struct {
+      //! How many times the VirtReg is referenced in all operands.
+      uint8_t _refCount;
+      //! Size of a memory operand in case that it's use instead of the register.
+      uint8_t _rmSize;
+      //! Physical register for use operation (ReadOnly / ReadWrite).
+      uint8_t _useId;
+      //! Physical register for out operation (WriteOnly).
+      uint8_t _outId;
+    };
+    //! Packed data.
+    uint32_t _packed;
+  };
+
+  //! Registers where inputs {R|X} can be allocated to.
+  RegMask _useRegMask;
+  //! Registers where outputs {W} can be allocated to.
+  RegMask _outRegMask;
+  //! Indexes used to rewrite USE regs.
+  uint32_t _useRewriteMask;
+  //! Indexes used to rewrite OUT regs.
+  uint32_t _outRewriteMask;
+
+  //! \}
+
+  //! \name Statics
+  //! \{
+
+  static inline RATiedFlags consecutiveDataToFlags(uint32_t offset) noexcept {
+    ASMJIT_ASSERT(offset < 4);
+    constexpr uint32_t kOffsetShift = Support::ConstCTZ<uint32_t(RATiedFlags::kConsecutiveData)>::value;
+    return (RATiedFlags)(offset << kOffsetShift);
+  }
+
+  static inline uint32_t consecutiveDataFromFlags(RATiedFlags flags) noexcept {
+    constexpr uint32_t kOffsetShift = Support::ConstCTZ<uint32_t(RATiedFlags::kConsecutiveData)>::value;
+    return uint32_t(flags & RATiedFlags::kConsecutiveData) >> kOffsetShift;
+  }
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline void init(uint32_t workId, RATiedFlags flags, RegMask useRegMask, uint32_t useId, uint32_t useRewriteMask, RegMask outRegMask, uint32_t outId, uint32_t outRewriteMask, uint32_t rmSize = 0, uint32_t consecutiveParent = Globals::kInvalidId) noexcept {
+    _workId = workId;
+    _consecutiveParent = consecutiveParent;
+    _flags = flags;
+    _refCount = 1;
+    _rmSize = uint8_t(rmSize);
+    _useId = uint8_t(useId);
+    _outId = uint8_t(outId);
+    _useRegMask = useRegMask;
+    _outRegMask = outRegMask;
+    _useRewriteMask = useRewriteMask;
+    _outRewriteMask = outRewriteMask;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the associated WorkReg id.
+  inline uint32_t workId() const noexcept { return _workId; }
+
+  inline bool hasConsecutiveParent() const noexcept { return _consecutiveParent != Globals::kInvalidId; }
+  inline uint32_t consecutiveParent() const noexcept { return _consecutiveParent; }
+  inline uint32_t consecutiveData() const noexcept { return consecutiveDataFromFlags(_flags); }
+
+  //! Returns TiedReg flags.
+  inline RATiedFlags flags() const noexcept { return _flags; }
+  //! Checks if the given `flag` is set.
+  inline bool hasFlag(RATiedFlags flag) const noexcept { return Support::test(_flags, flag); }
+  //! Adds tied register flags.
+  inline void addFlags(RATiedFlags flags) noexcept { _flags |= flags; }
+
+  //! Tests whether the register is read (writes `true` also if it's Read/Write).
+  inline bool isRead() const noexcept { return hasFlag(RATiedFlags::kRead); }
+  //! Tests whether the register is written (writes `true` also if it's Read/Write).
+  inline bool isWrite() const noexcept { return hasFlag(RATiedFlags::kWrite); }
+  //! Tests whether the register is read only.
+  inline bool isReadOnly() const noexcept { return (_flags & RATiedFlags::kRW) == RATiedFlags::kRead; }
+  //! Tests whether the register is write only.
+  inline bool isWriteOnly() const noexcept { return (_flags & RATiedFlags::kRW) == RATiedFlags::kWrite; }
+  //! Tests whether the register is read and written.
+  inline bool isReadWrite() const noexcept { return (_flags & RATiedFlags::kRW) == RATiedFlags::kRW; }
+
+  //! Tests whether the tied register has use operand (Read/ReadWrite).
+  inline bool isUse() const noexcept { return hasFlag(RATiedFlags::kUse); }
+  //! Tests whether the tied register has out operand (Write).
+  inline bool isOut() const noexcept { return hasFlag(RATiedFlags::kOut); }
+
+  //! Tests whether the tied register has \ref RATiedFlags::kLeadConsecutive flag set.
+  inline bool isLeadConsecutive() const noexcept { return hasFlag(RATiedFlags::kLeadConsecutive); }
+  //! Tests whether the tied register has \ref RATiedFlags::kUseConsecutive flag set.
+  inline bool isUseConsecutive() const noexcept { return hasFlag(RATiedFlags::kUseConsecutive); }
+  //! Tests whether the tied register has \ref RATiedFlags::kOutConsecutive flag set.
+  inline bool isOutConsecutive() const noexcept { return hasFlag(RATiedFlags::kOutConsecutive); }
+
+  //! Tests whether the tied register has any consecutive flag.
+  inline bool hasAnyConsecutiveFlag() const noexcept { return hasFlag(RATiedFlags::kLeadConsecutive | RATiedFlags::kUseConsecutive | RATiedFlags::kOutConsecutive); }
+
+  //! Tests whether the USE slot can be patched to memory operand.
+  inline bool hasUseRM() const noexcept { return hasFlag(RATiedFlags::kUseRM); }
+  //! Tests whether the OUT slot can be patched to memory operand.
+  inline bool hasOutRM() const noexcept { return hasFlag(RATiedFlags::kOutRM); }
+
+  inline uint32_t rmSize() const noexcept { return _rmSize; }
+
+  inline void makeReadOnly() noexcept {
+    _flags = (_flags & ~(RATiedFlags::kOut | RATiedFlags::kWrite)) | RATiedFlags::kUse;
+    _useRewriteMask |= _outRewriteMask;
+    _outRewriteMask = 0;
+  }
+
+  inline void makeWriteOnly() noexcept {
+    _flags = (_flags & ~(RATiedFlags::kUse | RATiedFlags::kRead)) | RATiedFlags::kOut;
+    _outRewriteMask |= _useRewriteMask;
+    _useRewriteMask = 0;
+  }
+
+  //! Tests whether the register would duplicate.
+  inline bool isDuplicate() const noexcept { return hasFlag(RATiedFlags::kDuplicate); }
+
+  //! Tests whether the register (and the instruction it's part of) appears last in the basic block.
+  inline bool isLast() const noexcept { return hasFlag(RATiedFlags::kLast); }
+  //! Tests whether the register should be killed after USEd and/or OUTed.
+  inline bool isKill() const noexcept { return hasFlag(RATiedFlags::kKill); }
+
+  //! Tests whether the register is OUT or KILL (used internally by local register allocator).
+  inline bool isOutOrKill() const noexcept { return hasFlag(RATiedFlags::kOut | RATiedFlags::kKill); }
+
+  //! Returns a register mask that describes allocable USE registers (Read/ReadWrite access).
+  inline RegMask useRegMask() const noexcept { return _useRegMask; }
+  //! Returns a register mask that describes allocable OUT registers (WriteOnly access).
+  inline RegMask outRegMask() const noexcept { return _outRegMask; }
+
+  inline uint32_t refCount() const noexcept { return _refCount; }
+  inline void addRefCount(uint32_t n = 1) noexcept { _refCount = uint8_t(_refCount + n); }
+
+  //! Tests whether the register must be allocated to a fixed physical register before it's used.
+  inline bool hasUseId() const noexcept { return _useId != BaseReg::kIdBad; }
+  //! Tests whether the register must be allocated to a fixed physical register before it's written.
+  inline bool hasOutId() const noexcept { return _outId != BaseReg::kIdBad; }
+
+  //! Returns a physical register id used for 'use' operation.
+  inline uint32_t useId() const noexcept { return _useId; }
+  //! Returns a physical register id used for 'out' operation.
+  inline uint32_t outId() const noexcept { return _outId; }
+
+  inline uint32_t useRewriteMask() const noexcept { return _useRewriteMask; }
+  inline uint32_t outRewriteMask() const noexcept { return _outRewriteMask; }
+
+  //! Sets a physical register used for 'use' operation.
+  inline void setUseId(uint32_t index) noexcept { _useId = uint8_t(index); }
+  //! Sets a physical register used for 'out' operation.
+  inline void setOutId(uint32_t index) noexcept { _outId = uint8_t(index); }
+
+  inline bool isUseDone() const noexcept { return hasFlag(RATiedFlags::kUseDone); }
+  inline bool isOutDone() const noexcept { return hasFlag(RATiedFlags::kUseDone); }
+
+  inline void markUseDone() noexcept { addFlags(RATiedFlags::kUseDone); }
+  inline void markOutDone() noexcept { addFlags(RATiedFlags::kUseDone); }
+
+  //! \}
+};
+
+//! Flags used by \ref RAWorkReg.
+enum class RAWorkRegFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+
+  //! This register has already been allocated.
+  kAllocated = 0x00000001u,
+  //! Has been coalesced to another WorkReg.
+  kCoalesced = 0x00000002u,
+
+  //! Set when this register is used as a LEAD consecutive register at least once.
+  kLeadConsecutive = 0x00000004u,
+  //! Used to mark consecutive registers during processing.
+  kProcessedConsecutive = 0x00000008u,
+
+  //! Stack slot has to be allocated.
+  kStackUsed = 0x00000010u,
+  //! Stack allocation is preferred.
+  kStackPreferred = 0x00000020u,
+  //! Marked for stack argument reassignment.
+  kStackArgToStack = 0x00000040u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(RAWorkRegFlags)
+
+//! Work register provides additional data of \ref VirtReg that is used by register allocator.
+//!
+//! In general when a virtual register is found by register allocator it maps it to \ref RAWorkReg
+//! and then only works with it. The reason for such mapping is that users can create many virtual
+//! registers, which are not used inside a register allocation scope (which is currently always a
+//! function). So register allocator basically scans the function for virtual registers and maps
+//! them into WorkRegs, which receive a temporary ID (workId), which starts from zero. This WorkId
+//! is then used in bit-arrays and other mappings.
+class RAWorkReg {
+public:
+  ASMJIT_NONCOPYABLE(RAWorkReg)
+
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    kIdNone = 0xFFFFFFFFu
+  };
+
+  enum : uint32_t {
+    kNoArgIndex = 0xFFu
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! RAPass specific ID used during analysis and allocation.
+  uint32_t _workId = 0;
+  //! Copy of ID used by \ref VirtReg.
+  uint32_t _virtId = 0;
+
+  //! Permanent association with \ref VirtReg.
+  VirtReg* _virtReg = nullptr;
+  //! Temporary association with \ref RATiedReg.
+  RATiedReg* _tiedReg = nullptr;
+  //! Stack slot associated with the register.
+  RAStackSlot* _stackSlot = nullptr;
+
+  //! Copy of a signature used by \ref VirtReg.
+  OperandSignature _signature {};
+  //! RAPass specific flags used during analysis and allocation.
+  RAWorkRegFlags _flags = RAWorkRegFlags::kNone;
+
+  //! Constains all USE ids collected from all instructions.
+  //!
+  //! If this mask is non-zero and not a power of two, it means that the register is used multiple times in
+  //! instructions where it requires to have a different use ID. This means that in general it's not possible
+  //! to keep this register in a single home.
+  RegMask _useIdMask = 0;
+  //! Preferred mask of registers (if non-zero) to allocate this register to.
+  //!
+  //! If this mask is zero it means that either there is no intersection of preferred registers collected from all
+  //! TiedRegs or there is no preference at all (the register can be allocated to any register all the time).
+  RegMask _preferredMask = 0xFFFFFFFFu;
+  //! Consecutive mask, which was collected from all instructions where this register was used as a lead consecutive
+  //! register.
+  RegMask _consecutiveMask = 0xFFFFFFFFu;
+  //! IDs of all physical registers that are clobbered during the lifetime of this WorkReg.
+  //!
+  //! This mask should be updated by `RAPass::buildLiveness()`, because it's global and should
+  //! be updated after unreachable code has been removed.
+  RegMask _clobberSurvivalMask = 0;
+  //! IDs of all physical registers this WorkReg has been allocated to.
+  RegMask _allocatedMask = 0;
+
+  //! A byte-mask where each bit represents one valid byte of the register.
+  uint64_t _regByteMask = 0;
+
+  //! Argument index (or `kNoArgIndex` if none).
+  uint8_t _argIndex = kNoArgIndex;
+  //! Argument value index in the pack (0 by default).
+  uint8_t _argValueIndex = 0;
+  //! Global home register ID (if any, assigned by RA).
+  uint8_t _homeRegId = BaseReg::kIdBad;
+  //! Global hint register ID (provided by RA or user).
+  uint8_t _hintRegId = BaseReg::kIdBad;
+
+  //! Live spans of the `VirtReg`.
+  LiveRegSpans _liveSpans {};
+  //! Live statistics.
+  RALiveStats _liveStats {};
+
+  //! All nodes that read/write this VirtReg/WorkReg.
+  ZoneVector<BaseNode*> _refs {};
+  //! All nodes that write to this VirtReg/WorkReg.
+  ZoneVector<BaseNode*> _writes {};
+
+  //! Contains work IDs of all immediate consecutive registers of this register.
+  //!
+  //! \note This bit array only contains immediate consecutives. This means that if this is a register that is
+  //! followed by 3 more registers, then it would still have only a single immediate. The rest registers would
+  //! have immediate consecutive registers as well, except the last one.
+  ZoneBitVector _immediateConsecutives {};
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RAWorkReg(VirtReg* vReg, uint32_t workId) noexcept
+    : _workId(workId),
+      _virtId(vReg->id()),
+      _virtReg(vReg),
+      _signature(vReg->signature()) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline uint32_t workId() const noexcept { return _workId; }
+  inline uint32_t virtId() const noexcept { return _virtId; }
+
+  inline const char* name() const noexcept { return _virtReg->name(); }
+  inline uint32_t nameSize() const noexcept { return _virtReg->nameSize(); }
+
+  inline TypeId typeId() const noexcept { return _virtReg->typeId(); }
+
+  inline RAWorkRegFlags flags() const noexcept { return _flags; }
+  inline bool hasFlag(RAWorkRegFlags flag) const noexcept { return Support::test(_flags, flag); }
+  inline void addFlags(RAWorkRegFlags flags) noexcept { _flags |= flags; }
+
+  inline bool isAllocated() const noexcept { return hasFlag(RAWorkRegFlags::kAllocated); }
+  inline void markAllocated() noexcept { addFlags(RAWorkRegFlags::kAllocated); }
+
+  inline bool isLeadConsecutive() const noexcept { return hasFlag(RAWorkRegFlags::kLeadConsecutive); }
+  inline void markLeadConsecutive() noexcept { addFlags(RAWorkRegFlags::kLeadConsecutive); }
+
+  inline bool isProcessedConsecutive() const noexcept { return hasFlag(RAWorkRegFlags::kProcessedConsecutive); }
+  inline void markProcessedConsecutive() noexcept { addFlags(RAWorkRegFlags::kProcessedConsecutive); }
+
+  inline bool isStackUsed() const noexcept { return hasFlag(RAWorkRegFlags::kStackUsed); }
+  inline void markStackUsed() noexcept { addFlags(RAWorkRegFlags::kStackUsed); }
+
+  inline bool isStackPreferred() const noexcept { return hasFlag(RAWorkRegFlags::kStackPreferred); }
+  inline void markStackPreferred() noexcept { addFlags(RAWorkRegFlags::kStackPreferred); }
+
+  //! Tests whether this RAWorkReg has been coalesced with another one (cannot be used anymore).
+  inline bool isCoalesced() const noexcept { return hasFlag(RAWorkRegFlags::kCoalesced); }
+
+  inline OperandSignature signature() const noexcept { return _signature; }
+  inline RegType type() const noexcept { return _signature.regType(); }
+  inline RegGroup group() const noexcept { return _signature.regGroup(); }
+
+  inline VirtReg* virtReg() const noexcept { return _virtReg; }
+
+  inline bool hasTiedReg() const noexcept { return _tiedReg != nullptr; }
+  inline RATiedReg* tiedReg() const noexcept { return _tiedReg; }
+  inline void setTiedReg(RATiedReg* tiedReg) noexcept { _tiedReg = tiedReg; }
+  inline void resetTiedReg() noexcept { _tiedReg = nullptr; }
+
+  inline bool hasStackSlot() const noexcept { return _stackSlot != nullptr; }
+  inline RAStackSlot* stackSlot() const noexcept { return _stackSlot; }
+
+  inline LiveRegSpans& liveSpans() noexcept { return _liveSpans; }
+  inline const LiveRegSpans& liveSpans() const noexcept { return _liveSpans; }
+
+  inline RALiveStats& liveStats() noexcept { return _liveStats; }
+  inline const RALiveStats& liveStats() const noexcept { return _liveStats; }
+
+  inline bool hasArgIndex() const noexcept { return _argIndex != kNoArgIndex; }
+  inline uint32_t argIndex() const noexcept { return _argIndex; }
+  inline uint32_t argValueIndex() const noexcept { return _argValueIndex; }
+
+  inline void setArgIndex(uint32_t argIndex, uint32_t valueIndex) noexcept {
+    _argIndex = uint8_t(argIndex);
+    _argValueIndex = uint8_t(valueIndex);
+  }
+
+  inline bool hasHomeRegId() const noexcept { return _homeRegId != BaseReg::kIdBad; }
+  inline uint32_t homeRegId() const noexcept { return _homeRegId; }
+  inline void setHomeRegId(uint32_t physId) noexcept { _homeRegId = uint8_t(physId); }
+
+  inline bool hasHintRegId() const noexcept { return _hintRegId != BaseReg::kIdBad; }
+  inline uint32_t hintRegId() const noexcept { return _hintRegId; }
+  inline void setHintRegId(uint32_t physId) noexcept { _hintRegId = uint8_t(physId); }
+
+  inline RegMask useIdMask() const noexcept { return _useIdMask; }
+  inline bool hasUseIdMask() const noexcept { return _useIdMask != 0u; }
+  inline bool hasMultipleUseIds() const noexcept { return _useIdMask != 0u && !Support::isPowerOf2(_useIdMask); }
+  inline void addUseIdMask(RegMask mask) noexcept { _useIdMask |= mask; }
+
+  inline RegMask preferredMask() const noexcept { return _preferredMask; }
+  inline bool hasPrereffedMask() const noexcept { return _preferredMask != 0xFFFFFFFFu; }
+  inline void restrictPreferredMask(RegMask mask) noexcept { _preferredMask &= mask; }
+
+  inline RegMask consecutiveMask() const noexcept { return _consecutiveMask; }
+  inline bool hasConsecutiveMask() const noexcept { return _consecutiveMask != 0xFFFFFFFFu; }
+  inline void restrictConsecutiveMask(RegMask mask) noexcept { _consecutiveMask &= mask; }
+
+  inline RegMask clobberSurvivalMask() const noexcept { return _clobberSurvivalMask; }
+  inline void addClobberSurvivalMask(RegMask mask) noexcept { _clobberSurvivalMask |= mask; }
+
+  inline RegMask allocatedMask() const noexcept { return _allocatedMask; }
+  inline void addAllocatedMask(RegMask mask) noexcept { _allocatedMask |= mask; }
+
+  inline uint64_t regByteMask() const noexcept { return _regByteMask; }
+  inline void setRegByteMask(uint64_t mask) noexcept { _regByteMask = mask; }
+
+  inline bool hasImmediateConsecutives() const noexcept { return !_immediateConsecutives.empty(); }
+  inline const ZoneBitVector& immediateConsecutives() const noexcept { return _immediateConsecutives; }
+
+  inline Error addImmediateConsecutive(ZoneAllocator* allocator, uint32_t workId) noexcept {
+    if (_immediateConsecutives.size() <= workId)
+      ASMJIT_PROPAGATE(_immediateConsecutives.resize(allocator, workId + 1));
+
+    _immediateConsecutives.setBit(workId, true);
+    return kErrorOk;
+  }
+
+  //! \}
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_RADEFS_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/ralocal.cpp b/lib/lepton/asmjit/core/ralocal.cpp
new file mode 100644
index 0000000000..b4d92446b3
--- /dev/null
+++ b/lib/lepton/asmjit/core/ralocal.cpp
@@ -0,0 +1,1166 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/ralocal_p.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// RALocalAllocator - Utilities
+// ============================
+
+static ASMJIT_FORCE_INLINE RATiedReg* RALocal_findTiedRegByWorkId(RATiedReg* tiedRegs, size_t count, uint32_t workId) noexcept {
+  for (size_t i = 0; i < count; i++)
+    if (tiedRegs[i].workId() == workId)
+      return &tiedRegs[i];
+  return nullptr;
+}
+
+// RALocalAllocator - Init & Reset
+// ===============================
+
+Error RALocalAllocator::init() noexcept {
+  PhysToWorkMap* physToWorkMap;
+  WorkToPhysMap* workToPhysMap;
+
+  physToWorkMap = _pass->newPhysToWorkMap();
+  workToPhysMap = _pass->newWorkToPhysMap();
+  if (!physToWorkMap || !workToPhysMap)
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  _curAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
+  _curAssignment.initMaps(physToWorkMap, workToPhysMap);
+
+  physToWorkMap = _pass->newPhysToWorkMap();
+  workToPhysMap = _pass->newWorkToPhysMap();
+  _tmpWorkToPhysMap = _pass->newWorkToPhysMap();
+
+  if (!physToWorkMap || !workToPhysMap || !_tmpWorkToPhysMap)
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  _tmpAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
+  _tmpAssignment.initMaps(physToWorkMap, workToPhysMap);
+
+  return kErrorOk;
+}
+
+// RALocalAllocator - Assignment
+// =============================
+
+Error RALocalAllocator::makeInitialAssignment() noexcept {
+  FuncNode* func = _pass->func();
+  RABlock* entry = _pass->entryBlock();
+
+  ZoneBitVector& liveIn = entry->liveIn();
+  uint32_t argCount = func->argCount();
+  uint32_t numIter = 1;
+
+  for (uint32_t iter = 0; iter < numIter; iter++) {
+    for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+      for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+        // Unassigned argument.
+        const RegOnly& regArg = func->argPack(argIndex)[valueIndex];
+        if (!regArg.isReg() || !_cc->isVirtIdValid(regArg.id()))
+          continue;
+
+        VirtReg* virtReg = _cc->virtRegById(regArg.id());
+
+        // Unreferenced argument.
+        RAWorkReg* workReg = virtReg->workReg();
+        if (!workReg)
+          continue;
+
+        // Overwritten argument.
+        uint32_t workId = workReg->workId();
+        if (!liveIn.bitAt(workId))
+          continue;
+
+        RegGroup group = workReg->group();
+        if (_curAssignment.workToPhysId(group, workId) != RAAssignment::kPhysNone)
+          continue;
+
+        RegMask allocableRegs = _availableRegs[group] & ~_curAssignment.assigned(group);
+        if (iter == 0) {
+          // First iteration: Try to allocate to home RegId.
+          if (workReg->hasHomeRegId()) {
+            uint32_t physId = workReg->homeRegId();
+            if (Support::bitTest(allocableRegs, physId)) {
+              _curAssignment.assign(group, workId, physId, true);
+              _pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->type(), physId, workReg->typeId());
+              continue;
+            }
+          }
+
+          numIter = 2;
+        }
+        else {
+          // Second iteration: Pick any other register if the is an unassigned one or assign to stack.
+          if (allocableRegs) {
+            uint32_t physId = Support::ctz(allocableRegs);
+            _curAssignment.assign(group, workId, physId, true);
+            _pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->type(), physId, workReg->typeId());
+          }
+          else {
+            // This register will definitely need stack, create the slot now and assign also `argIndex`
+            // to it. We will patch `_argsAssignment` later after RAStackAllocator finishes.
+            RAStackSlot* slot = _pass->getOrCreateStackSlot(workReg);
+            if (ASMJIT_UNLIKELY(!slot))
+              return DebugUtils::errored(kErrorOutOfMemory);
+
+            // This means STACK_ARG may be moved to STACK.
+            workReg->addFlags(RAWorkRegFlags::kStackArgToStack);
+            _pass->_numStackArgsToStackSlots++;
+          }
+        }
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+Error RALocalAllocator::replaceAssignment(const PhysToWorkMap* physToWorkMap) noexcept {
+  _curAssignment.copyFrom(physToWorkMap);
+  return kErrorOk;
+}
+
+Error RALocalAllocator::switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, const ZoneBitVector& liveIn, bool dstReadOnly, bool tryMode) noexcept {
+  RAAssignment dst;
+  RAAssignment& cur = _curAssignment;
+
+  dst.initLayout(_pass->_physRegCount, _pass->workRegs());
+  dst.initMaps(dstPhysToWorkMap, _tmpWorkToPhysMap);
+  dst.assignWorkIdsFromPhysIds();
+
+  if (tryMode)
+    return kErrorOk;
+
+  for (RegGroup group : RegGroupVirtValues{}) {
+    // STEP 1
+    // ------
+    //
+    //   - KILL all registers that are not live at `dst`,
+    //   - SPILL all registers that are not assigned at `dst`.
+
+    if (!tryMode) {
+      Support::BitWordIterator<RegMask> it(cur.assigned(group));
+      while (it.hasNext()) {
+        uint32_t physId = it.next();
+        uint32_t workId = cur.physToWorkId(group, physId);
+
+        // Must be true as we iterate over assigned registers.
+        ASMJIT_ASSERT(workId != RAAssignment::kWorkNone);
+
+        // KILL if it's not live on entry.
+        if (!liveIn.bitAt(workId)) {
+          onKillReg(group, workId, physId);
+          continue;
+        }
+
+        // SPILL if it's not assigned on entry.
+        uint32_t altId = dst.workToPhysId(group, workId);
+        if (altId == RAAssignment::kPhysNone) {
+          ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
+        }
+      }
+    }
+
+    // STEP 2
+    // ------
+    //
+    //   - MOVE and SWAP registers from their current assignments into their DST assignments.
+    //   - Build `willLoadRegs` mask of registers scheduled for `onLoadReg()`.
+
+    // Current run-id (1 means more aggressive decisions).
+    int32_t runId = -1;
+    // Remaining registers scheduled for `onLoadReg()`.
+    RegMask willLoadRegs = 0;
+    // Remaining registers to be allocated in this loop.
+    RegMask affectedRegs = dst.assigned(group);
+
+    while (affectedRegs) {
+      if (++runId == 2) {
+        if (!tryMode)
+          return DebugUtils::errored(kErrorInvalidState);
+
+        // Stop in `tryMode` if we haven't done anything in past two rounds.
+        break;
+      }
+
+      Support::BitWordIterator<RegMask> it(affectedRegs);
+      while (it.hasNext()) {
+        uint32_t physId = it.next();
+        RegMask physMask = Support::bitMask<RegMask>(physId);
+
+        uint32_t curWorkId = cur.physToWorkId(group, physId);
+        uint32_t dstWorkId = dst.physToWorkId(group, physId);
+
+        // The register must have assigned `dstWorkId` as we only iterate over assigned regs.
+        ASMJIT_ASSERT(dstWorkId != RAAssignment::kWorkNone);
+
+        if (curWorkId != RAAssignment::kWorkNone) {
+          // Both assigned.
+          if (curWorkId != dstWorkId) {
+            // Wait a bit if this is the first run, we may avoid this if `curWorkId` moves out.
+            if (runId <= 0)
+              continue;
+
+            uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
+            if (altPhysId == RAAssignment::kPhysNone)
+              continue;
+
+            // Reset as we will do some changes to the current assignment.
+            runId = -1;
+
+            if (_archTraits->hasInstRegSwap(group)) {
+              ASMJIT_PROPAGATE(onSwapReg(group, curWorkId, physId, dstWorkId, altPhysId));
+            }
+            else {
+              // SPILL the reg if it's not dirty in DST, otherwise try to MOVE.
+              if (!cur.isPhysDirty(group, physId)) {
+                ASMJIT_PROPAGATE(onKillReg(group, curWorkId, physId));
+              }
+              else {
+                RegMask allocableRegs = _pass->_availableRegs[group] & ~cur.assigned(group);
+
+                // If possible don't conflict with assigned regs at DST.
+                if (allocableRegs & ~dst.assigned(group))
+                  allocableRegs &= ~dst.assigned(group);
+
+                if (allocableRegs) {
+                  // MOVE is possible, thus preferred.
+                  uint32_t tmpPhysId = Support::ctz(allocableRegs);
+
+                  ASMJIT_PROPAGATE(onMoveReg(group, curWorkId, tmpPhysId, physId));
+                  _pass->_clobberedRegs[group] |= Support::bitMask(tmpPhysId);
+                }
+                else {
+                  // MOVE is impossible, must SPILL.
+                  ASMJIT_PROPAGATE(onSpillReg(group, curWorkId, physId));
+                }
+              }
+
+              goto Cleared;
+            }
+          }
+        }
+        else {
+Cleared:
+          // DST assigned, CUR unassigned.
+          uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
+          if (altPhysId == RAAssignment::kPhysNone) {
+            if (liveIn.bitAt(dstWorkId))
+              willLoadRegs |= physMask; // Scheduled for `onLoadReg()`.
+            affectedRegs &= ~physMask;  // Unaffected from now.
+            continue;
+          }
+          ASMJIT_PROPAGATE(onMoveReg(group, dstWorkId, physId, altPhysId));
+        }
+
+        // Both DST and CUR assigned to the same reg or CUR just moved to DST.
+        if ((dst.dirty(group) & physMask) != (cur.dirty(group) & physMask)) {
+          if ((dst.dirty(group) & physMask) == 0) {
+            // CUR dirty, DST not dirty (the assert is just to visualize the condition).
+            ASMJIT_ASSERT(!dst.isPhysDirty(group, physId) && cur.isPhysDirty(group, physId));
+
+            // If `dstReadOnly` is true it means that that block was already processed and we cannot change from
+            // CLEAN to DIRTY. In that case the register has to be saved as it cannot enter the block DIRTY.
+            if (dstReadOnly)
+              ASMJIT_PROPAGATE(onSaveReg(group, dstWorkId, physId));
+            else
+              dst.makeDirty(group, dstWorkId, physId);
+          }
+          else {
+            // DST dirty, CUR not dirty (the assert is just to visualize the condition).
+            ASMJIT_ASSERT(dst.isPhysDirty(group, physId) && !cur.isPhysDirty(group, physId));
+
+            cur.makeDirty(group, dstWorkId, physId);
+          }
+        }
+
+        // Must match now...
+        ASMJIT_ASSERT(dst.physToWorkId(group, physId) == cur.physToWorkId(group, physId));
+        ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
+
+        runId = -1;
+        affectedRegs &= ~physMask;
+      }
+    }
+
+    // STEP 3
+    // ------
+    //
+    //   - Load registers specified by `willLoadRegs`.
+
+    {
+      Support::BitWordIterator<RegMask> it(willLoadRegs);
+      while (it.hasNext()) {
+        uint32_t physId = it.next();
+
+        if (!cur.isPhysAssigned(group, physId)) {
+          uint32_t workId = dst.physToWorkId(group, physId);
+
+          // The algorithm is broken if it tries to load a register that is not in LIVE-IN.
+          ASMJIT_ASSERT(liveIn.bitAt(workId) == true);
+
+          ASMJIT_PROPAGATE(onLoadReg(group, workId, physId));
+          if (dst.isPhysDirty(group, physId))
+            cur.makeDirty(group, workId, physId);
+          ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
+        }
+        else {
+          // Not possible otherwise.
+          ASMJIT_ASSERT(tryMode == true);
+        }
+      }
+    }
+  }
+
+  if (!tryMode) {
+    // Here is a code that dumps the conflicting part if something fails here:
+    // if (!dst.equals(cur)) {
+    //   uint32_t physTotal = dst._layout.physTotal;
+    //   uint32_t workCount = dst._layout.workCount;
+    //
+    //   fprintf(stderr, "Dirty    DST=0x%08X CUR=0x%08X\n", dst.dirty(RegGroup::kGp), cur.dirty(RegGroup::kGp));
+    //   fprintf(stderr, "Assigned DST=0x%08X CUR=0x%08X\n", dst.assigned(RegGroup::kGp), cur.assigned(RegGroup::kGp));
+    //
+    //   for (uint32_t physId = 0; physId < physTotal; physId++) {
+    //     uint32_t dstWorkId = dst._physToWorkMap->workIds[physId];
+    //     uint32_t curWorkId = cur._physToWorkMap->workIds[physId];
+    //     if (dstWorkId != curWorkId)
+    //       fprintf(stderr, "[PhysIdWork] PhysId=%u WorkId[DST(%u) != CUR(%u)]\n", physId, dstWorkId, curWorkId);
+    //   }
+    //
+    //   for (uint32_t workId = 0; workId < workCount; workId++) {
+    //     uint32_t dstPhysId = dst._workToPhysMap->physIds[workId];
+    //     uint32_t curPhysId = cur._workToPhysMap->physIds[workId];
+    //     if (dstPhysId != curPhysId)
+    //       fprintf(stderr, "[WorkToPhys] WorkId=%u PhysId[DST(%u) != CUR(%u)]\n", workId, dstPhysId, curPhysId);
+    //   }
+    // }
+    ASMJIT_ASSERT(dst.equals(cur));
+  }
+
+  return kErrorOk;
+}
+
+Error RALocalAllocator::spillScratchGpRegsBeforeEntry(RegMask scratchRegs) noexcept {
+  RegGroup group = RegGroup::kGp;
+  Support::BitWordIterator<RegMask> it(scratchRegs);
+
+  while (it.hasNext()) {
+    uint32_t physId = it.next();
+    if (_curAssignment.isPhysAssigned(group, physId)) {
+      uint32_t workId = _curAssignment.physToWorkId(group, physId);
+      ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
+    }
+  }
+
+  return kErrorOk;
+}
+
+// RALocalAllocator - Allocation
+// =============================
+
+Error RALocalAllocator::allocInst(InstNode* node) noexcept {
+  RAInst* raInst = node->passData<RAInst>();
+
+  RATiedReg* outTiedRegs[Globals::kMaxPhysRegs];
+  RATiedReg* dupTiedRegs[Globals::kMaxPhysRegs];
+  RATiedReg* consecutiveRegs[kMaxConsecutiveRegs];
+
+  // The cursor must point to the previous instruction for a possible instruction insertion.
+  _cc->_setCursor(node->prev());
+
+  _node = node;
+  _raInst = raInst;
+  _tiedTotal = raInst->_tiedTotal;
+  _tiedCount = raInst->_tiedCount;
+
+  // Whether we already replaced register operand with memory operand.
+  bool rmAllocated = false;
+
+  for (RegGroup group : RegGroupVirtValues{}) {
+    uint32_t i, count = this->tiedCount(group);
+    RATiedReg* tiedRegs = this->tiedRegs(group);
+
+    RegMask willUse = _raInst->_usedRegs[group];
+    RegMask willOut = _raInst->_clobberedRegs[group];
+    RegMask willFree = 0;
+
+    uint32_t usePending = count;
+    uint32_t outTiedCount = 0;
+    uint32_t dupTiedCount = 0;
+    uint32_t consecutiveMask = 0;
+
+    // STEP 1
+    // ------
+    //
+    // Calculate `willUse` and `willFree` masks based on tied registers we have. In addition, aggregate information
+    // regarding consecutive registers used by this instruction. We need that to make USE/OUT assignments.
+    //
+    // We don't do any assignment decisions at this stage as we just need to collect some information first. Then,
+    // after we populate all masks needed we can finally make some decisions in the second loop. The main reason
+    // for this is that we really need `willFree` to make assignment decisions for `willUse`, because if we mark
+    // some registers that will be freed, we can consider them in decision making afterwards.
+
+    for (i = 0; i < count; i++) {
+      RATiedReg* tiedReg = &tiedRegs[i];
+
+      if (tiedReg->hasAnyConsecutiveFlag()) {
+        uint32_t consecutiveOffset = tiedReg->isLeadConsecutive() ? uint32_t(0) : tiedReg->consecutiveData();
+
+        if (ASMJIT_UNLIKELY(Support::bitTest(consecutiveMask, consecutiveOffset)))
+          return DebugUtils::errored(kErrorInvalidState);
+
+        consecutiveMask |= Support::bitMask(consecutiveOffset);
+        consecutiveRegs[consecutiveOffset] = tiedReg;
+      }
+
+      // Add OUT and KILL to `outPending` for CLOBBERing and/or OUT assignment.
+      if (tiedReg->isOutOrKill())
+        outTiedRegs[outTiedCount++] = tiedReg;
+
+      if (tiedReg->isDuplicate())
+        dupTiedRegs[dupTiedCount++] = tiedReg;
+
+      if (!tiedReg->isUse()) {
+        tiedReg->markUseDone();
+        usePending--;
+        continue;
+      }
+
+      // Don't assign anything here if this is a consecutive USE - we will handle this in STEP 2 instead.
+      if (tiedReg->isUseConsecutive())
+        continue;
+
+      uint32_t workId = tiedReg->workId();
+      uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
+
+      if (tiedReg->hasUseId()) {
+        // If the register has `useId` it means it can only be allocated in that register.
+        RegMask useMask = Support::bitMask(tiedReg->useId());
+
+        // RAInstBuilder must have collected `usedRegs` on-the-fly.
+        ASMJIT_ASSERT((willUse & useMask) != 0);
+
+        if (assignedId == tiedReg->useId()) {
+          // If the register is already allocated in this one, mark it done and continue.
+          tiedReg->markUseDone();
+          if (tiedReg->isWrite())
+            _curAssignment.makeDirty(group, workId, assignedId);
+          usePending--;
+          willUse |= useMask;
+        }
+        else {
+          willFree |= useMask & _curAssignment.assigned(group);
+        }
+      }
+      else {
+        // Check if the register must be moved to `allocableRegs`.
+        RegMask allocableRegs = tiedReg->useRegMask();
+        if (assignedId != RAAssignment::kPhysNone) {
+          RegMask assignedMask = Support::bitMask(assignedId);
+          if ((allocableRegs & ~willUse) & assignedMask) {
+            tiedReg->setUseId(assignedId);
+            tiedReg->markUseDone();
+            if (tiedReg->isWrite())
+              _curAssignment.makeDirty(group, workId, assignedId);
+            usePending--;
+            willUse |= assignedMask;
+          }
+          else {
+            willFree |= assignedMask;
+          }
+        }
+      }
+    }
+
+    // STEP 2
+    // ------
+    //
+    // Verify that all the consecutive registers are really consecutive. Terminate if there is a gap. In addition,
+    // decide which USE ids will be used in case that this consecutive sequence is USE (OUT registers are allocated
+    // in a different step).
+    uint32_t consecutiveCount = 0;
+
+    if (consecutiveMask) {
+      if ((consecutiveMask & (consecutiveMask + 1u)) != 0)
+        return DebugUtils::errored(kErrorInvalidState);
+
+      // Count of trailing ones is the count of consecutive registers. There cannot be gap.
+      consecutiveCount = Support::ctz(~consecutiveMask);
+
+      // Prioritize allocation that would result in least moves even when moving registers away from their homes.
+      RATiedReg* lead = consecutiveRegs[0];
+
+      // Assign the best possible USE Ids to all consecutives.
+      if (lead->isUseConsecutive()) {
+        uint32_t bestScore = 0;
+        uint32_t bestLeadReg = 0xFFFFFFFF;
+        RegMask allocableRegs = (_availableRegs[group] | willFree) & ~willUse;
+
+        uint32_t assignments[kMaxConsecutiveRegs];
+
+        for (i = 0; i < consecutiveCount; i++)
+          assignments[i] = _curAssignment.workToPhysId(group, consecutiveRegs[i]->workId());
+
+        Support::BitWordIterator<uint32_t> it(lead->useRegMask());
+        while (it.hasNext()) {
+          uint32_t regIndex = it.next();
+          if (Support::bitTest(lead->useRegMask(), regIndex)) {
+            uint32_t score = 15;
+
+            for (i = 0; i < consecutiveCount; i++) {
+              uint32_t consecutiveIndex = regIndex + i;
+              if (!Support::bitTest(allocableRegs, consecutiveIndex)) {
+                score = 0;
+                break;
+              }
+
+              RAWorkReg* workReg = workRegById(consecutiveRegs[i]->workId());
+              score += uint32_t(workReg->homeRegId() == consecutiveIndex);
+              score += uint32_t(assignments[i] == consecutiveIndex) * 2;
+            }
+
+            if (score > bestScore) {
+              bestScore = score;
+              bestLeadReg = regIndex;
+            }
+          }
+        }
+
+        if (bestLeadReg == 0xFFFFFFFF)
+          return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
+
+        for (i = 0; i < consecutiveCount; i++) {
+          uint32_t consecutiveIndex = bestLeadReg + i;
+
+          RATiedReg* tiedReg = consecutiveRegs[i];
+          RegMask useMask = Support::bitMask(consecutiveIndex);
+
+          uint32_t workId = tiedReg->workId();
+          uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
+
+          tiedReg->setUseId(consecutiveIndex);
+
+          if (assignedId == consecutiveIndex) {
+            // If the register is already allocated in this one, mark it done and continue.
+            tiedReg->markUseDone();
+            if (tiedReg->isWrite())
+              _curAssignment.makeDirty(group, workId, assignedId);
+            usePending--;
+            willUse |= useMask;
+          }
+          else {
+            willUse |= useMask;
+            willFree |= useMask & _curAssignment.assigned(group);
+          }
+        }
+      }
+    }
+
+    // STEP 3
+    // ------
+    //
+    // Do some decision making to find the best candidates of registers that need to be assigned, moved, and/or
+    // spilled. Only USE registers are considered here, OUT will be decided later after all CLOBBERed and OUT
+    // registers are unassigned.
+
+    if (usePending) {
+      // TODO: Not sure `liveRegs` should be used, maybe willUse and willFree would be enough and much more clear.
+
+      // All registers that are currently alive without registers that will be freed.
+      RegMask liveRegs = _curAssignment.assigned(group) & ~willFree;
+
+      for (i = 0; i < count; i++) {
+        RATiedReg* tiedReg = &tiedRegs[i];
+        if (tiedReg->isUseDone())
+          continue;
+
+        uint32_t workId = tiedReg->workId();
+        uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
+
+        // REG/MEM: Patch register operand to memory operand if not allocated.
+        if (!rmAllocated && tiedReg->hasUseRM()) {
+          if (assignedId == RAAssignment::kPhysNone && Support::isPowerOf2(tiedReg->useRewriteMask())) {
+            RAWorkReg* workReg = workRegById(tiedReg->workId());
+            uint32_t opIndex = Support::ctz(tiedReg->useRewriteMask()) / uint32_t(sizeof(Operand) / sizeof(uint32_t));
+            uint32_t rmSize = tiedReg->rmSize();
+
+            if (rmSize <= workReg->virtReg()->virtSize()) {
+              Operand& op = node->operands()[opIndex];
+              op = _pass->workRegAsMem(workReg);
+              op.as<BaseMem>().setSize(rmSize);
+              tiedReg->_useRewriteMask = 0;
+
+              tiedReg->markUseDone();
+              usePending--;
+
+              rmAllocated = true;
+              continue;
+            }
+          }
+        }
+
+        if (!tiedReg->hasUseId()) {
+          // DECIDE where to assign the USE register.
+          RegMask allocableRegs = tiedReg->useRegMask() & ~(willFree | willUse);
+          uint32_t useId = decideOnAssignment(group, workId, assignedId, allocableRegs);
+
+          RegMask useMask = Support::bitMask(useId);
+          willUse |= useMask;
+          willFree |= useMask & liveRegs;
+          tiedReg->setUseId(useId);
+
+          if (assignedId != RAAssignment::kPhysNone) {
+            RegMask assignedMask = Support::bitMask(assignedId);
+
+            willFree |= assignedMask;
+            liveRegs &= ~assignedMask;
+
+            // OPTIMIZATION: Assign the USE register here if it's possible.
+            if (!(liveRegs & useMask)) {
+              ASMJIT_PROPAGATE(onMoveReg(group, workId, useId, assignedId));
+              tiedReg->markUseDone();
+              if (tiedReg->isWrite())
+                _curAssignment.makeDirty(group, workId, useId);
+              usePending--;
+            }
+          }
+          else {
+            // OPTIMIZATION: Assign the USE register here if it's possible.
+            if (!(liveRegs & useMask)) {
+              ASMJIT_PROPAGATE(onLoadReg(group, workId, useId));
+              tiedReg->markUseDone();
+              if (tiedReg->isWrite())
+                _curAssignment.makeDirty(group, workId, useId);
+              usePending--;
+            }
+          }
+
+          liveRegs |= useMask;
+        }
+      }
+    }
+
+    // Initially all used regs will be marked as clobbered.
+    RegMask clobberedByInst = willUse | willOut;
+
+    // STEP 4
+    // ------
+    //
+    // Free all registers that we marked as `willFree`. Only registers that are not USEd by the instruction are
+    // considered as we don't want to free regs we need.
+
+    if (willFree) {
+      RegMask allocableRegs = _availableRegs[group] & ~(_curAssignment.assigned(group) | willFree | willUse | willOut);
+      Support::BitWordIterator<RegMask> it(willFree);
+
+      do {
+        uint32_t assignedId = it.next();
+        if (_curAssignment.isPhysAssigned(group, assignedId)) {
+          uint32_t workId = _curAssignment.physToWorkId(group, assignedId);
+
+          // DECIDE whether to MOVE or SPILL.
+          if (allocableRegs) {
+            uint32_t reassignedId = decideOnReassignment(group, workId, assignedId, allocableRegs);
+            if (reassignedId != RAAssignment::kPhysNone) {
+              ASMJIT_PROPAGATE(onMoveReg(group, workId, reassignedId, assignedId));
+              allocableRegs ^= Support::bitMask(reassignedId);
+              continue;
+            }
+          }
+
+          ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
+        }
+      } while (it.hasNext());
+    }
+
+    // STEP 5
+    // ------
+    //
+    // ALLOCATE / SHUFFLE all registers that we marked as `willUse` and weren't allocated yet. This is a bit
+    // complicated as the allocation is iterative. In some cases we have to wait before allocating a particual
+    // physical register as it's still occupied by some other one, which we need to move before we can use it.
+    // In this case we skip it and allocate another some other instead (making it free for another iteration).
+    //
+    // NOTE: Iterations are mostly important for complicated allocations like function calls, where there can
+    // be up to N registers used at once. Asm instructions won't run the loop more than once in 99.9% of cases
+    // as they use 2..3 registers in average.
+
+    if (usePending) {
+      bool mustSwap = false;
+      do {
+        uint32_t oldPending = usePending;
+
+        for (i = 0; i < count; i++) {
+          RATiedReg* thisTiedReg = &tiedRegs[i];
+          if (thisTiedReg->isUseDone())
+            continue;
+
+          uint32_t thisWorkId = thisTiedReg->workId();
+          uint32_t thisPhysId = _curAssignment.workToPhysId(group, thisWorkId);
+
+          // This would be a bug, fatal one!
+          uint32_t targetPhysId = thisTiedReg->useId();
+          ASMJIT_ASSERT(targetPhysId != thisPhysId);
+
+          uint32_t targetWorkId = _curAssignment.physToWorkId(group, targetPhysId);
+          if (targetWorkId != RAAssignment::kWorkNone) {
+            RAWorkReg* targetWorkReg = workRegById(targetWorkId);
+
+            // Swapping two registers can solve two allocation tasks by emitting just a single instruction. However,
+            // swap is only available on few architectures and it's definitely not available for each register group.
+            // Calling `onSwapReg()` before checking these would be fatal.
+            if (_archTraits->hasInstRegSwap(group) && thisPhysId != RAAssignment::kPhysNone) {
+              ASMJIT_PROPAGATE(onSwapReg(group, thisWorkId, thisPhysId, targetWorkId, targetPhysId));
+
+              thisTiedReg->markUseDone();
+              if (thisTiedReg->isWrite())
+                _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
+              usePending--;
+
+              // Double-hit.
+              RATiedReg* targetTiedReg = RALocal_findTiedRegByWorkId(tiedRegs, count, targetWorkReg->workId());
+              if (targetTiedReg && targetTiedReg->useId() == thisPhysId) {
+                targetTiedReg->markUseDone();
+                if (targetTiedReg->isWrite())
+                  _curAssignment.makeDirty(group, targetWorkId, thisPhysId);
+                usePending--;
+              }
+              continue;
+            }
+
+            if (!mustSwap)
+              continue;
+
+            // Only branched here if the previous iteration did nothing. This is essentially a SWAP operation without
+            // having a dedicated instruction for that purpose (vector registers, etc). The simplest way to handle
+            // such case is to SPILL the target register.
+            ASMJIT_PROPAGATE(onSpillReg(group, targetWorkId, targetPhysId));
+          }
+
+          if (thisPhysId != RAAssignment::kPhysNone) {
+            ASMJIT_PROPAGATE(onMoveReg(group, thisWorkId, targetPhysId, thisPhysId));
+
+            thisTiedReg->markUseDone();
+            if (thisTiedReg->isWrite())
+              _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
+            usePending--;
+          }
+          else {
+            ASMJIT_PROPAGATE(onLoadReg(group, thisWorkId, targetPhysId));
+
+            thisTiedReg->markUseDone();
+            if (thisTiedReg->isWrite())
+              _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
+            usePending--;
+          }
+        }
+
+        mustSwap = (oldPending == usePending);
+      } while (usePending);
+    }
+
+    // STEP 6
+    // ------
+    //
+    // KILL registers marked as KILL/OUT.
+
+    uint32_t outPending = outTiedCount;
+    if (outTiedCount) {
+      for (i = 0; i < outTiedCount; i++) {
+        RATiedReg* tiedReg = outTiedRegs[i];
+
+        uint32_t workId = tiedReg->workId();
+        uint32_t physId = _curAssignment.workToPhysId(group, workId);
+
+        // Must check if it's allocated as KILL can be related to OUT (like KILL immediately after OUT, which could
+        // mean the register is not assigned).
+        if (physId != RAAssignment::kPhysNone) {
+          ASMJIT_PROPAGATE(onKillReg(group, workId, physId));
+          willOut &= ~Support::bitMask(physId);
+        }
+
+        // We still maintain number of pending registers for OUT assignment. So, if this is only KILL, not OUT, we
+        // can safely decrement it.
+        outPending -= !tiedReg->isOut();
+      }
+    }
+
+    // STEP 7
+    // ------
+    //
+    // SPILL registers that will be CLOBBERed. Since OUT and KILL were already processed this is used mostly to
+    // handle function CALLs.
+
+    if (willOut) {
+      Support::BitWordIterator<RegMask> it(willOut);
+      do {
+        uint32_t physId = it.next();
+        uint32_t workId = _curAssignment.physToWorkId(group, physId);
+
+        if (workId == RAAssignment::kWorkNone)
+          continue;
+
+        ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
+      } while (it.hasNext());
+    }
+
+    // STEP 8
+    // ------
+    //
+    // Duplication.
+
+    for (i = 0; i < dupTiedCount; i++) {
+      RATiedReg* tiedReg = dupTiedRegs[i];
+      uint32_t workId = tiedReg->workId();
+      uint32_t srcId = tiedReg->useId();
+
+      Support::BitWordIterator<RegMask> it(tiedReg->useRegMask());
+      while (it.hasNext()) {
+        uint32_t dstId = it.next();
+        if (dstId == srcId)
+          continue;
+        _pass->emitMove(workId, dstId, srcId);
+      }
+    }
+
+    // STEP 9
+    // ------
+    //
+    // Vector registers can be cloberred partially by invoke - find if that's the case and clobber when necessary.
+
+    if (node->isInvoke() && group == RegGroup::kVec) {
+      const InvokeNode* invokeNode = node->as<InvokeNode>();
+
+      RegMask maybeClobberedRegs = invokeNode->detail().callConv().preservedRegs(group) & _curAssignment.assigned(group);
+      if (maybeClobberedRegs) {
+        uint32_t saveRestoreVecSize = invokeNode->detail().callConv().saveRestoreRegSize(group);
+        Support::BitWordIterator<RegMask> it(maybeClobberedRegs);
+
+        do {
+          uint32_t physId = it.next();
+          uint32_t workId = _curAssignment.physToWorkId(group, physId);
+
+          RAWorkReg* workReg = workRegById(workId);
+          uint32_t virtSize = workReg->virtReg()->virtSize();
+
+          if (virtSize > saveRestoreVecSize) {
+            ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
+          }
+
+        } while (it.hasNext());
+      }
+    }
+
+    // STEP 10
+    // -------
+    //
+    // Assign OUT registers.
+
+    if (outPending) {
+      // Live registers, we need a separate register (outside of `_curAssignment) to hold these because of KILLed
+      // registers. If we KILL a register here it will go out from `_curAssignment`, but we cannot assign to it in
+      // here.
+      RegMask liveRegs = _curAssignment.assigned(group);
+
+      // Must avoid as they have been already OUTed (added during the loop).
+      RegMask outRegs = 0;
+
+      // Must avoid as they collide with already allocated ones.
+      RegMask avoidRegs = willUse & ~clobberedByInst;
+
+      // Assign the best possible OUT ids of all consecutives.
+      if (consecutiveCount) {
+        RATiedReg* lead = consecutiveRegs[0];
+        if (lead->isOutConsecutive()) {
+          uint32_t bestScore = 0;
+          uint32_t bestLeadReg = 0xFFFFFFFF;
+          RegMask allocableRegs = _availableRegs[group] & ~(outRegs | avoidRegs);
+
+          Support::BitWordIterator<uint32_t> it(lead->outRegMask());
+          while (it.hasNext()) {
+            uint32_t regIndex = it.next();
+            if (Support::bitTest(lead->outRegMask(), regIndex)) {
+              uint32_t score = 15;
+
+              for (i = 0; i < consecutiveCount; i++) {
+                uint32_t consecutiveIndex = regIndex + i;
+                if (!Support::bitTest(allocableRegs, consecutiveIndex)) {
+                  score = 0;
+                  break;
+                }
+
+                RAWorkReg* workReg = workRegById(consecutiveRegs[i]->workId());
+                score += uint32_t(workReg->homeRegId() == consecutiveIndex);
+              }
+
+              if (score > bestScore) {
+                bestScore = score;
+                bestLeadReg = regIndex;
+              }
+            }
+          }
+
+          if (bestLeadReg == 0xFFFFFFFF)
+            return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
+
+          for (i = 0; i < consecutiveCount; i++) {
+            uint32_t consecutiveIndex = bestLeadReg + i;
+            RATiedReg* tiedReg = consecutiveRegs[i];
+            tiedReg->setOutId(consecutiveIndex);
+          }
+        }
+      }
+
+      // Allocate OUT registers.
+      for (i = 0; i < outTiedCount; i++) {
+        RATiedReg* tiedReg = outTiedRegs[i];
+        if (!tiedReg->isOut())
+          continue;
+
+        uint32_t workId = tiedReg->workId();
+        uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
+
+        if (assignedId != RAAssignment::kPhysNone)
+          ASMJIT_PROPAGATE(onKillReg(group, workId, assignedId));
+
+        uint32_t physId = tiedReg->outId();
+        if (physId == RAAssignment::kPhysNone) {
+          RegMask allocableRegs = tiedReg->outRegMask() & ~(outRegs | avoidRegs);
+
+          if (!(allocableRegs & ~liveRegs)) {
+            // There are no more registers, decide which one to spill.
+            uint32_t spillWorkId;
+            physId = decideOnSpillFor(group, workId, allocableRegs & liveRegs, &spillWorkId);
+            ASMJIT_PROPAGATE(onSpillReg(group, spillWorkId, physId));
+          }
+          else {
+            physId = decideOnAssignment(group, workId, RAAssignment::kPhysNone, allocableRegs & ~liveRegs);
+          }
+        }
+
+        // OUTs are CLOBBERed thus cannot be ASSIGNed right now.
+        ASMJIT_ASSERT(!_curAssignment.isPhysAssigned(group, physId));
+
+        if (!tiedReg->isKill())
+          ASMJIT_PROPAGATE(onAssignReg(group, workId, physId, true));
+
+        tiedReg->setOutId(physId);
+        tiedReg->markOutDone();
+
+        outRegs |= Support::bitMask(physId);
+        liveRegs &= ~Support::bitMask(physId);
+        outPending--;
+      }
+
+      clobberedByInst |= outRegs;
+      ASMJIT_ASSERT(outPending == 0);
+    }
+
+    _clobberedRegs[group] |= clobberedByInst;
+  }
+
+  return kErrorOk;
+}
+
+Error RALocalAllocator::spillAfterAllocation(InstNode* node) noexcept {
+  // This is experimental feature that would spill registers that don't have home-id and are last in this basic block.
+  // This prevents saving these regs in other basic blocks and then restoring them (mostly relevant for loops).
+  RAInst* raInst = node->passData<RAInst>();
+  uint32_t count = raInst->tiedCount();
+
+  for (uint32_t i = 0; i < count; i++) {
+    RATiedReg* tiedReg = raInst->tiedAt(i);
+    if (tiedReg->isLast()) {
+      uint32_t workId = tiedReg->workId();
+      RAWorkReg* workReg = workRegById(workId);
+      if (!workReg->hasHomeRegId()) {
+        RegGroup group = workReg->group();
+        uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
+        if (assignedId != RAAssignment::kPhysNone) {
+          _cc->_setCursor(node);
+          ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
+        }
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* cont) noexcept {
+  // TODO: This should be used to make the branch allocation better.
+  DebugUtils::unused(cont);
+
+  // The cursor must point to the previous instruction for a possible instruction insertion.
+  _cc->_setCursor(node->prev());
+
+  // Use TryMode of `switchToAssignment()` if possible.
+  if (target->hasEntryAssignment()) {
+    ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), true));
+  }
+
+  ASMJIT_PROPAGATE(allocInst(node));
+  ASMJIT_PROPAGATE(spillRegsBeforeEntry(target));
+
+  if (target->hasEntryAssignment()) {
+    BaseNode* injectionPoint = _pass->extraBlock()->prev();
+    BaseNode* prevCursor = _cc->setCursor(injectionPoint);
+
+    _tmpAssignment.copyFrom(_curAssignment);
+    ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), false));
+
+    BaseNode* curCursor = _cc->cursor();
+    if (curCursor != injectionPoint) {
+      // Additional instructions emitted to switch from the current state to the `target` state. This means
+      // that we have to move these instructions into an independent code block and patch the jump location.
+      Operand& targetOp = node->op(node->opCount() - 1);
+      if (ASMJIT_UNLIKELY(!targetOp.isLabel()))
+        return DebugUtils::errored(kErrorInvalidState);
+
+      Label trampoline = _cc->newLabel();
+      Label savedTarget = targetOp.as<Label>();
+
+      // Patch `target` to point to the `trampoline` we just created.
+      targetOp = trampoline;
+
+      // Clear a possible SHORT form as we have no clue now if the SHORT form would be encodable after patching
+      // the target to `trampoline` (X86 specific).
+      node->clearOptions(InstOptions::kShortForm);
+
+      // Finalize the switch assignment sequence.
+      ASMJIT_PROPAGATE(_pass->emitJump(savedTarget));
+      _cc->_setCursor(injectionPoint);
+      _cc->bind(trampoline);
+    }
+
+    _cc->_setCursor(prevCursor);
+    _curAssignment.swap(_tmpAssignment);
+  }
+  else {
+    ASMJIT_PROPAGATE(_pass->setBlockEntryAssignment(target, block(), _curAssignment));
+  }
+
+  return kErrorOk;
+}
+
+Error RALocalAllocator::allocJumpTable(InstNode* node, const RABlocks& targets, RABlock* cont) noexcept {
+  // TODO: Do we really need to use `cont`?
+  DebugUtils::unused(cont);
+
+  if (targets.empty())
+    return DebugUtils::errored(kErrorInvalidState);
+
+  // The cursor must point to the previous instruction for a possible instruction insertion.
+  _cc->_setCursor(node->prev());
+
+  // All `targets` should have the same sharedAssignmentId, we just read the first.
+  RABlock* anyTarget = targets[0];
+  if (!anyTarget->hasSharedAssignmentId())
+    return DebugUtils::errored(kErrorInvalidState);
+
+  RASharedAssignment& sharedAssignment = _pass->_sharedAssignments[anyTarget->sharedAssignmentId()];
+
+  ASMJIT_PROPAGATE(allocInst(node));
+
+  if (!sharedAssignment.empty()) {
+    ASMJIT_PROPAGATE(switchToAssignment(
+      sharedAssignment.physToWorkMap(),
+      sharedAssignment.liveIn(),
+      true,  // Read-only.
+      false  // Try-mode.
+    ));
+  }
+
+  ASMJIT_PROPAGATE(spillRegsBeforeEntry(anyTarget));
+
+  if (sharedAssignment.empty()) {
+    ASMJIT_PROPAGATE(_pass->setBlockEntryAssignment(anyTarget, block(), _curAssignment));
+  }
+
+  return kErrorOk;
+}
+
+// RALocalAllocator - Decision Making
+// ==================================
+
+uint32_t RALocalAllocator::decideOnAssignment(RegGroup group, uint32_t workId, uint32_t physId, RegMask allocableRegs) const noexcept {
+  ASMJIT_ASSERT(allocableRegs != 0);
+  DebugUtils::unused(group, physId);
+
+  RAWorkReg* workReg = workRegById(workId);
+
+  // Prefer home register id, if possible.
+  if (workReg->hasHomeRegId()) {
+    uint32_t homeId = workReg->homeRegId();
+    if (Support::bitTest(allocableRegs, homeId))
+      return homeId;
+  }
+
+  // Prefer registers used upon block entries.
+  RegMask previouslyAssignedRegs = workReg->allocatedMask();
+  if (allocableRegs & previouslyAssignedRegs)
+    allocableRegs &= previouslyAssignedRegs;
+
+  return Support::ctz(allocableRegs);
+}
+
+uint32_t RALocalAllocator::decideOnReassignment(RegGroup group, uint32_t workId, uint32_t physId, RegMask allocableRegs) const noexcept {
+  ASMJIT_ASSERT(allocableRegs != 0);
+  DebugUtils::unused(group, physId);
+
+  RAWorkReg* workReg = workRegById(workId);
+
+  // Prefer allocating back to HomeId, if possible.
+  if (workReg->hasHomeRegId()) {
+    if (Support::bitTest(allocableRegs, workReg->homeRegId()))
+      return workReg->homeRegId();
+  }
+
+  // TODO: [Register Allocator] This could be improved.
+
+  // Decided to SPILL.
+  return RAAssignment::kPhysNone;
+}
+
+uint32_t RALocalAllocator::decideOnSpillFor(RegGroup group, uint32_t workId, RegMask spillableRegs, uint32_t* spillWorkId) const noexcept {
+  // May be used in the future to decide which register would be best to spill so `workId` can be assigned.
+  DebugUtils::unused(workId);
+  ASMJIT_ASSERT(spillableRegs != 0);
+
+  Support::BitWordIterator<RegMask> it(spillableRegs);
+  uint32_t bestPhysId = it.next();
+  uint32_t bestWorkId = _curAssignment.physToWorkId(group, bestPhysId);
+
+  // Avoid calculating the cost model if there is only one spillable register.
+  if (it.hasNext()) {
+    uint32_t bestCost = calculateSpillCost(group, bestWorkId, bestPhysId);
+    do {
+      uint32_t localPhysId = it.next();
+      uint32_t localWorkId = _curAssignment.physToWorkId(group, localPhysId);
+      uint32_t localCost = calculateSpillCost(group, localWorkId, localPhysId);
+
+      if (localCost < bestCost) {
+        bestCost = localCost;
+        bestPhysId = localPhysId;
+        bestWorkId = localWorkId;
+      }
+    } while (it.hasNext());
+  }
+
+  *spillWorkId = bestWorkId;
+  return bestPhysId;
+}
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
diff --git a/lib/lepton/asmjit/core/ralocal_p.h b/lib/lepton/asmjit/core/ralocal_p.h
new file mode 100644
index 0000000000..b40e867427
--- /dev/null
+++ b/lib/lepton/asmjit/core/ralocal_p.h
@@ -0,0 +1,254 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_RALOCAL_P_H_INCLUDED
+#define ASMJIT_CORE_RALOCAL_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/raassignment_p.h"
+#include "../core/radefs_p.h"
+#include "../core/rapass_p.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_ra
+//! \{
+
+//! Local register allocator.
+class RALocalAllocator {
+public:
+  ASMJIT_NONCOPYABLE(RALocalAllocator)
+
+  typedef RAAssignment::PhysToWorkMap PhysToWorkMap;
+  typedef RAAssignment::WorkToPhysMap WorkToPhysMap;
+
+  //! Link to `BaseRAPass`.
+  BaseRAPass* _pass;
+  //! Link to `BaseCompiler`.
+  BaseCompiler* _cc;
+
+  //! Architecture traits.
+  const ArchTraits* _archTraits;
+  //! Registers available to the allocator.
+  RARegMask _availableRegs;
+  //! Registers clobbered by the allocator.
+  RARegMask _clobberedRegs;
+
+  //! Register assignment (current).
+  RAAssignment _curAssignment;
+  //! Register assignment used temporarily during assignment switches.
+  RAAssignment _tmpAssignment;
+
+  //! Link to the current `RABlock`.
+  RABlock* _block;
+  //! InstNode.
+  InstNode* _node;
+  //! RA instruction.
+  RAInst* _raInst;
+
+  //! Count of all TiedReg's.
+  uint32_t _tiedTotal;
+  //! TiedReg's total counter.
+  RARegCount _tiedCount;
+
+  //! Temporary workToPhysMap that can be used freely by the allocator.
+  WorkToPhysMap* _tmpWorkToPhysMap;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RALocalAllocator(BaseRAPass* pass) noexcept
+    : _pass(pass),
+      _cc(pass->cc()),
+      _archTraits(pass->_archTraits),
+      _availableRegs(pass->_availableRegs),
+      _clobberedRegs(),
+      _curAssignment(),
+      _block(nullptr),
+      _node(nullptr),
+      _raInst(nullptr),
+      _tiedTotal(),
+      _tiedCount() {}
+
+  Error init() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline RAWorkReg* workRegById(uint32_t workId) const noexcept { return _pass->workRegById(workId); }
+  inline PhysToWorkMap* physToWorkMap() const noexcept { return _curAssignment.physToWorkMap(); }
+  inline WorkToPhysMap* workToPhysMap() const noexcept { return _curAssignment.workToPhysMap(); }
+
+  //! Returns the currently processed block.
+  inline RABlock* block() const noexcept { return _block; }
+  //! Sets the currently processed block.
+  inline void setBlock(RABlock* block) noexcept { _block = block; }
+
+  //! Returns the currently processed `InstNode`.
+  inline InstNode* node() const noexcept { return _node; }
+  //! Returns the currently processed `RAInst`.
+  inline RAInst* raInst() const noexcept { return _raInst; }
+
+  //! Returns all tied regs as `RATiedReg` array.
+  inline RATiedReg* tiedRegs() const noexcept { return _raInst->tiedRegs(); }
+  //! Returns tied registers grouped by the given `group`.
+  inline RATiedReg* tiedRegs(RegGroup group) const noexcept { return _raInst->tiedRegs(group); }
+
+  //! Returns count of all TiedRegs used by the instruction.
+  inline uint32_t tiedCount() const noexcept { return _tiedTotal; }
+  //! Returns count of TiedRegs used by the given register `group`.
+  inline uint32_t tiedCount(RegGroup group) const noexcept { return _tiedCount.get(group); }
+
+  inline bool isGroupUsed(RegGroup group) const noexcept { return _tiedCount[group] != 0; }
+
+  //! \}
+
+  //! \name Assignment
+  //! \{
+
+  Error makeInitialAssignment() noexcept;
+
+  Error replaceAssignment(const PhysToWorkMap* physToWorkMap) noexcept;
+
+  //! Switch to the given assignment by reassigning all register and emitting code that reassigns them.
+  //! This is always used to switch to a previously stored assignment.
+  //!
+  //! If `tryMode` is true then the final assignment doesn't have to be exactly same as specified by `dstPhysToWorkMap`
+  //! and `dstWorkToPhysMap`. This mode is only used before conditional jumps that already have assignment to generate
+  //! a code sequence that is always executed regardless of the flow.
+  Error switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, const ZoneBitVector& liveIn, bool dstReadOnly, bool tryMode) noexcept;
+
+  inline Error spillRegsBeforeEntry(RABlock* block) noexcept {
+    return spillScratchGpRegsBeforeEntry(block->entryScratchGpRegs());
+  }
+
+  Error spillScratchGpRegsBeforeEntry(uint32_t scratchRegs) noexcept;
+
+  //! \}
+
+  //! \name Allocation
+  //! \{
+
+  Error allocInst(InstNode* node) noexcept;
+  Error spillAfterAllocation(InstNode* node) noexcept;
+
+  Error allocBranch(InstNode* node, RABlock* target, RABlock* cont) noexcept;
+  Error allocJumpTable(InstNode* node, const RABlocks& targets, RABlock* cont) noexcept;
+
+  //! \}
+
+  //! \name Decision Making
+  //! \{
+
+  enum CostModel : uint32_t {
+    kCostOfFrequency = 1048576,
+    kCostOfDirtyFlag = kCostOfFrequency / 4
+  };
+
+  inline uint32_t costByFrequency(float freq) const noexcept {
+    return uint32_t(int32_t(freq * float(kCostOfFrequency)));
+  }
+
+  inline uint32_t calculateSpillCost(RegGroup group, uint32_t workId, uint32_t assignedId) const noexcept {
+    RAWorkReg* workReg = workRegById(workId);
+    uint32_t cost = costByFrequency(workReg->liveStats().freq());
+
+    if (_curAssignment.isPhysDirty(group, assignedId))
+      cost += kCostOfDirtyFlag;
+
+    return cost;
+  }
+
+  //! Decides on register assignment.
+  uint32_t decideOnAssignment(RegGroup group, uint32_t workId, uint32_t assignedId, RegMask allocableRegs) const noexcept;
+
+  //! Decides on whether to MOVE or SPILL the given WorkReg, because it's allocated in a physical register that have
+  //! to be used by another WorkReg.
+  //!
+  //! The function must return either `RAAssignment::kPhysNone`, which means that the WorkReg of `workId` should be
+  //! spilled, or a valid physical register ID, which means that the register should be moved to that physical register
+  //! instead.
+  uint32_t decideOnReassignment(RegGroup group, uint32_t workId, uint32_t assignedId, RegMask allocableRegs) const noexcept;
+
+  //! Decides on best spill given a register mask `spillableRegs`
+  uint32_t decideOnSpillFor(RegGroup group, uint32_t workId, RegMask spillableRegs, uint32_t* spillWorkId) const noexcept;
+
+  //! \}
+
+  //! \name Emit
+  //! \{
+
+  //! Emits a move between a destination and source register, and fixes the
+  //! register assignment.
+  inline Error onMoveReg(RegGroup group, uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept {
+    if (dstPhysId == srcPhysId) return kErrorOk;
+    _curAssignment.reassign(group, workId, dstPhysId, srcPhysId);
+    return _pass->emitMove(workId, dstPhysId, srcPhysId);
+  }
+
+  //! Emits a swap between two physical registers and fixes their assignment.
+  //!
+  //! \note Target must support this operation otherwise this would ASSERT.
+  inline Error onSwapReg(RegGroup group, uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept {
+    _curAssignment.swap(group, aWorkId, aPhysId, bWorkId, bPhysId);
+    return _pass->emitSwap(aWorkId, aPhysId, bWorkId, bPhysId);
+  }
+
+  //! Emits a load from [VirtReg/WorkReg]'s spill slot to a physical register
+  //! and makes it assigned and clean.
+  inline Error onLoadReg(RegGroup group, uint32_t workId, uint32_t physId) noexcept {
+    _curAssignment.assign(group, workId, physId, RAAssignment::kClean);
+    return _pass->emitLoad(workId, physId);
+  }
+
+  //! Emits a save a physical register to a [VirtReg/WorkReg]'s spill slot,
+  //! keeps it assigned, and makes it clean.
+  inline Error onSaveReg(RegGroup group, uint32_t workId, uint32_t physId) noexcept {
+    ASMJIT_ASSERT(_curAssignment.workToPhysId(group, workId) == physId);
+    ASMJIT_ASSERT(_curAssignment.physToWorkId(group, physId) == workId);
+
+    _curAssignment.makeClean(group, workId, physId);
+    return _pass->emitSave(workId, physId);
+  }
+
+  //! Assigns a register, the content of it is undefined at this point.
+  inline Error onAssignReg(RegGroup group, uint32_t workId, uint32_t physId, bool dirty) noexcept {
+    _curAssignment.assign(group, workId, physId, dirty);
+    return kErrorOk;
+  }
+
+  //! Spills a variable/register, saves the content to the memory-home if modified.
+  inline Error onSpillReg(RegGroup group, uint32_t workId, uint32_t physId) noexcept {
+    if (_curAssignment.isPhysDirty(group, physId))
+      ASMJIT_PROPAGATE(onSaveReg(group, workId, physId));
+    return onKillReg(group, workId, physId);
+  }
+
+  inline Error onDirtyReg(RegGroup group, uint32_t workId, uint32_t physId) noexcept {
+    _curAssignment.makeDirty(group, workId, physId);
+    return kErrorOk;
+  }
+
+  inline Error onKillReg(RegGroup group, uint32_t workId, uint32_t physId) noexcept {
+    _curAssignment.unassign(group, workId, physId);
+    return kErrorOk;
+  }
+
+  //! \}
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_CORE_RALOCAL_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/rapass.cpp b/lib/lepton/asmjit/core/rapass.cpp
new file mode 100644
index 0000000000..0e87ab6a25
--- /dev/null
+++ b/lib/lepton/asmjit/core/rapass.cpp
@@ -0,0 +1,1969 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/formatter.h"
+#include "../core/ralocal_p.h"
+#include "../core/rapass_p.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../core/zonestack.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// RABlock - Control Flow
+// ======================
+
+Error RABlock::appendSuccessor(RABlock* successor) noexcept {
+  RABlock* predecessor = this;
+
+  if (predecessor->hasSuccessor(successor))
+    return kErrorOk;
+
+  ASMJIT_PROPAGATE(successor->_predecessors.willGrow(allocator()));
+  ASMJIT_PROPAGATE(predecessor->_successors.willGrow(allocator()));
+
+  predecessor->_successors.appendUnsafe(successor);
+  successor->_predecessors.appendUnsafe(predecessor);
+
+  return kErrorOk;
+}
+
+Error RABlock::prependSuccessor(RABlock* successor) noexcept {
+  RABlock* predecessor = this;
+
+  if (predecessor->hasSuccessor(successor))
+    return kErrorOk;
+
+  ASMJIT_PROPAGATE(successor->_predecessors.willGrow(allocator()));
+  ASMJIT_PROPAGATE(predecessor->_successors.willGrow(allocator()));
+
+  predecessor->_successors.prependUnsafe(successor);
+  successor->_predecessors.prependUnsafe(predecessor);
+
+  return kErrorOk;
+}
+
+// BaseRAPass - Construction & Destruction
+// =======================================
+
+BaseRAPass::BaseRAPass() noexcept : FuncPass("BaseRAPass") {}
+BaseRAPass::~BaseRAPass() noexcept {}
+
+// BaseRAPass - RunOnFunction
+// ==========================
+
+static void BaseRAPass_reset(BaseRAPass* self, FuncDetail* funcDetail) noexcept {
+  ZoneAllocator* allocator = self->allocator();
+
+  self->_blocks.reset();
+  self->_exits.reset();
+  self->_pov.reset();
+  self->_workRegs.reset();
+  self->_instructionCount = 0;
+  self->_createdBlockCount = 0;
+
+  self->_sharedAssignments.reset();
+  self->_lastTimestamp = 0;
+
+  self->_archTraits = nullptr;
+  self->_physRegIndex.reset();
+  self->_physRegCount.reset();
+  self->_physRegTotal = 0;
+  self->_scratchRegIndexes.fill(BaseReg::kIdBad);
+
+  self->_availableRegs.reset();
+  self->_availableRegCount.reset();
+  self->_clobberedRegs.reset();
+
+  self->_workRegs.reset();
+  self->_workRegsOfGroup.forEach([](RAWorkRegs& regs) { regs.reset(); });
+  self->_strategy.forEach([](RAStrategy& strategy) { strategy.reset(); });
+  self->_globalLiveSpans.fill(nullptr);
+  self->_globalMaxLiveCount.reset();
+  self->_temporaryMem.reset();
+
+  self->_stackAllocator.reset(allocator);
+  self->_argsAssignment.reset(funcDetail);
+  self->_numStackArgsToStackSlots = 0;
+  self->_maxWorkRegNameSize = 0;
+}
+
+static void BaseRAPass_resetVirtRegData(BaseRAPass* self) noexcept {
+  for (RAWorkReg* wReg : self->_workRegs) {
+    VirtReg* vReg = wReg->virtReg();
+
+    // Update the information regarding the stack of the virtual register.
+    if (wReg->hasStackSlot()) {
+      RAStackSlot* slot = wReg->stackSlot();
+      vReg->assignStackSlot(slot->offset());
+    }
+
+    // Reset work reg association so it cannot be used by accident (RAWorkReg data will be destroyed).
+    vReg->_workReg = nullptr;
+  }
+}
+
+Error BaseRAPass::runOnFunction(Zone* zone, Logger* logger, FuncNode* func) {
+  _allocator.reset(zone);
+
+#ifndef ASMJIT_NO_LOGGING
+  _logger = logger;
+  _formatOptions.reset();
+  _diagnosticOptions = DiagnosticOptions::kNone;
+
+  if (logger) {
+    _formatOptions = logger->options();
+    _diagnosticOptions = _cb->diagnosticOptions();
+  }
+#else
+  DebugUtils::unused(logger);
+#endif
+
+  // Initialize all core structures to use `zone` and `func`.
+  BaseNode* end = func->endNode();
+  _func = func;
+  _stop = end->next();
+  _extraBlock = end;
+
+  BaseRAPass_reset(this, &_func->_funcDetail);
+
+  // Initialize architecture-specific members.
+  onInit();
+
+  // Perform all allocation steps required.
+  Error err = onPerformAllSteps();
+
+  // Must be called regardless of the allocation status.
+  onDone();
+
+  // Reset possible connections introduced by the register allocator.
+  BaseRAPass_resetVirtRegData(this);
+
+  // Reset all core structures and everything that depends on the passed `Zone`.
+  BaseRAPass_reset(this, nullptr);
+  _allocator.reset(nullptr);
+
+#ifndef ASMJIT_NO_LOGGING
+  _logger = nullptr;
+  _formatOptions.reset();
+  _diagnosticOptions = DiagnosticOptions::kNone;
+#endif
+
+  _func = nullptr;
+  _stop = nullptr;
+  _extraBlock = nullptr;
+
+  // Reset `Zone` as nothing should persist between `runOnFunction()` calls.
+  zone->reset();
+
+  // We alter the compiler cursor, because it doesn't make sense to reference it after the compilation - some
+  // nodes may disappear and the old cursor can go out anyway.
+  cc()->_setCursor(cc()->lastNode());
+
+  return err;
+}
+
+Error BaseRAPass::onPerformAllSteps() noexcept {
+  ASMJIT_PROPAGATE(buildCFG());
+  ASMJIT_PROPAGATE(buildCFGViews());
+  ASMJIT_PROPAGATE(removeUnreachableCode());
+
+  ASMJIT_PROPAGATE(buildCFGDominators());
+  ASMJIT_PROPAGATE(buildLiveness());
+  ASMJIT_PROPAGATE(assignArgIndexToWorkRegs());
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate))
+    ASMJIT_PROPAGATE(annotateCode());
+#endif
+
+  ASMJIT_PROPAGATE(runGlobalAllocator());
+  ASMJIT_PROPAGATE(runLocalAllocator());
+
+  ASMJIT_PROPAGATE(updateStackFrame());
+  ASMJIT_PROPAGATE(insertPrologEpilog());
+
+  ASMJIT_PROPAGATE(rewrite());
+
+  return kErrorOk;
+}
+
+// BaseRAPass - CFG - Basic Block Management
+// =========================================
+
+RABlock* BaseRAPass::newBlock(BaseNode* initialNode) noexcept {
+  RABlock* block = zone()->newT<RABlock>(this);
+  if (ASMJIT_UNLIKELY(!block))
+    return nullptr;
+
+  block->setFirst(initialNode);
+  block->setLast(initialNode);
+
+  _createdBlockCount++;
+  return block;
+}
+
+RABlock* BaseRAPass::newBlockOrExistingAt(LabelNode* cbLabel, BaseNode** stoppedAt) noexcept {
+  if (cbLabel->hasPassData())
+    return cbLabel->passData<RABlock>();
+
+  FuncNode* func = this->func();
+  BaseNode* node = cbLabel->prev();
+  RABlock* block = nullptr;
+
+  // Try to find some label, but terminate the loop on any code. We try hard to coalesce code that contains two
+  // consecutive labels or a combination of non-code nodes between 2 or more labels.
+  //
+  // Possible cases that would share the same basic block:
+  //
+  //   1. Two or more consecutive labels:
+  //     Label1:
+  //     Label2:
+  //
+  //   2. Two or more labels separated by non-code nodes:
+  //     Label1:
+  //     ; Some comment...
+  //     .align 16
+  //     Label2:
+  size_t nPendingLabels = 0;
+
+  while (node) {
+    if (node->type() == NodeType::kLabel) {
+      // Function has a different NodeType, just make sure this was not messed up as we must never associate
+      // BasicBlock with a `func` itself.
+      ASMJIT_ASSERT(node != func);
+
+      block = node->passData<RABlock>();
+      if (block) {
+        // Exit node has always a block associated with it. If we went here it means that `cbLabel` passed here
+        // is after the end of the function and cannot be merged with the function exit block.
+        if (node == func->exitNode())
+          block = nullptr;
+        break;
+      }
+
+      nPendingLabels++;
+    }
+    else if (node->type() == NodeType::kAlign) {
+      // Align node is fine.
+    }
+    else {
+      break;
+    }
+
+    node = node->prev();
+  }
+
+  if (stoppedAt)
+    *stoppedAt = node;
+
+  if (!block) {
+    block = newBlock();
+    if (ASMJIT_UNLIKELY(!block))
+      return nullptr;
+  }
+
+  cbLabel->setPassData<RABlock>(block);
+  node = cbLabel;
+
+  while (nPendingLabels) {
+    node = node->prev();
+    for (;;) {
+      if (node->type() == NodeType::kLabel) {
+        node->setPassData<RABlock>(block);
+        nPendingLabels--;
+        break;
+      }
+
+      node = node->prev();
+      ASMJIT_ASSERT(node != nullptr);
+    }
+  }
+
+  if (!block->first()) {
+    block->setFirst(node);
+    block->setLast(cbLabel);
+  }
+
+  return block;
+}
+
+Error BaseRAPass::addBlock(RABlock* block) noexcept {
+  ASMJIT_PROPAGATE(_blocks.willGrow(allocator()));
+
+  block->_blockId = blockCount();
+  _blocks.appendUnsafe(block);
+  return kErrorOk;
+}
+
+// BaseRAPass - CFG - Build
+// ========================
+
+Error BaseRAPass::initSharedAssignments(const ZoneVector<uint32_t>& sharedAssignmentsMap) noexcept {
+  if (sharedAssignmentsMap.empty())
+    return kErrorOk;
+
+  uint32_t count = 0;
+  for (RABlock* block : _blocks) {
+    if (block->hasSharedAssignmentId()) {
+      uint32_t sharedAssignmentId = sharedAssignmentsMap[block->sharedAssignmentId()];
+      block->setSharedAssignmentId(sharedAssignmentId);
+      count = Support::max(count, sharedAssignmentId + 1);
+    }
+  }
+
+  ASMJIT_PROPAGATE(_sharedAssignments.resize(allocator(), count));
+
+  // Aggregate all entry scratch GP regs from blocks of the same assignment to the assignment itself. It will then be
+  // used instead of RABlock's own scratch regs mask, as shared assignments have precedence.
+  for (RABlock* block : _blocks) {
+    if (block->hasJumpTable()) {
+      const RABlocks& successors = block->successors();
+      if (!successors.empty()) {
+        RABlock* firstSuccessor = successors[0];
+        // NOTE: Shared assignments connect all possible successors so we only need the first to propagate exit scratch
+        // GP registers.
+        ASMJIT_ASSERT(firstSuccessor->hasSharedAssignmentId());
+        RASharedAssignment& sa = _sharedAssignments[firstSuccessor->sharedAssignmentId()];
+        sa.addEntryScratchGpRegs(block->exitScratchGpRegs());
+      }
+    }
+    if (block->hasSharedAssignmentId()) {
+      RASharedAssignment& sa = _sharedAssignments[block->sharedAssignmentId()];
+      sa.addEntryScratchGpRegs(block->_entryScratchGpRegs);
+    }
+  }
+
+  return kErrorOk;
+}
+
+// BaseRAPass - CFG - Views Order
+// ==============================
+
+class RABlockVisitItem {
+public:
+  inline RABlockVisitItem(RABlock* block, uint32_t index) noexcept
+    : _block(block),
+      _index(index) {}
+
+  inline RABlockVisitItem(const RABlockVisitItem& other) noexcept
+    : _block(other._block),
+      _index(other._index) {}
+
+  inline RABlockVisitItem& operator=(const RABlockVisitItem& other) noexcept = default;
+
+  inline RABlock* block() const noexcept { return _block; }
+  inline uint32_t index() const noexcept { return _index; }
+
+  RABlock* _block;
+  uint32_t _index;
+};
+
+Error BaseRAPass::buildCFGViews() noexcept {
+#ifndef ASMJIT_NO_LOGGING
+  Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugCFG);
+  ASMJIT_RA_LOG_FORMAT("[BuildCFGViews]\n");
+#endif
+
+  uint32_t count = blockCount();
+  if (ASMJIT_UNLIKELY(!count)) return kErrorOk;
+
+  ASMJIT_PROPAGATE(_pov.reserve(allocator(), count));
+
+  ZoneStack<RABlockVisitItem> stack;
+  ASMJIT_PROPAGATE(stack.init(allocator()));
+
+  ZoneBitVector visited;
+  ASMJIT_PROPAGATE(visited.resize(allocator(), count));
+
+  RABlock* current = _blocks[0];
+  uint32_t i = 0;
+
+  for (;;) {
+    for (;;) {
+      if (i >= current->successors().size())
+        break;
+
+      // Skip if already visited.
+      RABlock* child = current->successors()[i++];
+      if (visited.bitAt(child->blockId()))
+        continue;
+
+      // Mark as visited to prevent visiting the same block multiple times.
+      visited.setBit(child->blockId(), true);
+
+      // Add the current block on the stack, we will get back to it later.
+      ASMJIT_PROPAGATE(stack.append(RABlockVisitItem(current, i)));
+      current = child;
+      i = 0;
+    }
+
+    current->makeReachable();
+    current->_povOrder = _pov.size();
+    _pov.appendUnsafe(current);
+
+    if (stack.empty())
+      break;
+
+    RABlockVisitItem top = stack.pop();
+    current = top.block();
+    i = top.index();
+  }
+
+  ASMJIT_RA_LOG_COMPLEX({
+    StringTmp<1024> sb;
+    for (RABlock* block : blocks()) {
+      sb.clear();
+      if (block->hasSuccessors()) {
+        sb.appendFormat("  #%u -> {", block->blockId());
+        _dumpBlockIds(sb, block->successors());
+        sb.append("}\n");
+      }
+      else {
+        sb.appendFormat("  #%u -> {Exit}\n", block->blockId());
+      }
+      logger->log(sb);
+    }
+  });
+
+  visited.release(allocator());
+  return kErrorOk;
+}
+
+// BaseRAPass - CFG - Dominators
+// =============================
+
+static ASMJIT_FORCE_INLINE RABlock* intersectBlocks(RABlock* b1, RABlock* b2) noexcept {
+  while (b1 != b2) {
+    while (b2->povOrder() > b1->povOrder()) b1 = b1->iDom();
+    while (b1->povOrder() > b2->povOrder()) b2 = b2->iDom();
+  }
+  return b1;
+}
+
+// Based on "A Simple, Fast Dominance Algorithm".
+Error BaseRAPass::buildCFGDominators() noexcept {
+#ifndef ASMJIT_NO_LOGGING
+  Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugCFG);
+  ASMJIT_RA_LOG_FORMAT("[BuildCFGDominators]\n");
+#endif
+
+  if (_blocks.empty())
+    return kErrorOk;
+
+  RABlock* entryBlock = this->entryBlock();
+  entryBlock->setIDom(entryBlock);
+
+  bool changed = true;
+  uint32_t nIters = 0;
+
+  while (changed) {
+    nIters++;
+    changed = false;
+
+    uint32_t i = _pov.size();
+    while (i) {
+      RABlock* block = _pov[--i];
+      if (block == entryBlock)
+        continue;
+
+      RABlock* iDom = nullptr;
+      const RABlocks& preds = block->predecessors();
+
+      uint32_t j = preds.size();
+      while (j) {
+        RABlock* p = preds[--j];
+        if (!p->iDom())
+          continue;
+        iDom = !iDom ? p : intersectBlocks(iDom, p);
+      }
+
+      if (block->iDom() != iDom) {
+        ASMJIT_ASSUME(iDom != nullptr);
+        ASMJIT_RA_LOG_FORMAT("  IDom of #%u -> #%u\n", block->blockId(), iDom->blockId());
+        block->setIDom(iDom);
+        changed = true;
+      }
+    }
+  }
+
+  ASMJIT_RA_LOG_FORMAT("  Done (%u iterations)\n", nIters);
+  return kErrorOk;
+}
+
+bool BaseRAPass::_strictlyDominates(const RABlock* a, const RABlock* b) const noexcept {
+  ASMJIT_ASSERT(a != nullptr); // There must be at least one block if this function is
+  ASMJIT_ASSERT(b != nullptr); // called, as both `a` and `b` must be valid blocks.
+  ASMJIT_ASSERT(a != b);       // Checked by `dominates()` and `strictlyDominates()`.
+
+  // Nothing strictly dominates the entry block.
+  const RABlock* entryBlock = this->entryBlock();
+  if (a == entryBlock)
+    return false;
+
+  const RABlock* iDom = b->iDom();
+  while (iDom != a && iDom != entryBlock)
+    iDom = iDom->iDom();
+
+  return iDom != entryBlock;
+}
+
+const RABlock* BaseRAPass::_nearestCommonDominator(const RABlock* a, const RABlock* b) const noexcept {
+  ASMJIT_ASSERT(a != nullptr); // There must be at least one block if this function is
+  ASMJIT_ASSERT(b != nullptr); // called, as both `a` and `b` must be valid blocks.
+  ASMJIT_ASSERT(a != b);       // Checked by `dominates()` and `properlyDominates()`.
+
+  if (a == b)
+    return a;
+
+  // If `a` strictly dominates `b` then `a` is the nearest common dominator.
+  if (_strictlyDominates(a, b))
+    return a;
+
+  // If `b` strictly dominates `a` then `b` is the nearest common dominator.
+  if (_strictlyDominates(b, a))
+    return b;
+
+  const RABlock* entryBlock = this->entryBlock();
+  uint64_t timestamp = nextTimestamp();
+
+  // Mark all A's dominators.
+  const RABlock* block = a->iDom();
+  while (block != entryBlock) {
+    block->setTimestamp(timestamp);
+    block = block->iDom();
+  }
+
+  // Check all B's dominators against marked dominators of A.
+  block = b->iDom();
+  while (block != entryBlock) {
+    if (block->hasTimestamp(timestamp))
+      return block;
+    block = block->iDom();
+  }
+
+  return entryBlock;
+}
+
+// BaseRAPass - CFG - Utilities
+// ============================
+
+Error BaseRAPass::removeUnreachableCode() noexcept {
+  uint32_t numAllBlocks = blockCount();
+  uint32_t numReachableBlocks = reachableBlockCount();
+
+  // All reachable -> nothing to do.
+  if (numAllBlocks == numReachableBlocks)
+    return kErrorOk;
+
+#ifndef ASMJIT_NO_LOGGING
+  StringTmp<256> sb;
+  Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugUnreachable);
+  ASMJIT_RA_LOG_FORMAT("[RemoveUnreachableCode - detected %u of %u unreachable blocks]\n", numAllBlocks - numReachableBlocks, numAllBlocks);
+#endif
+
+  for (uint32_t i = 0; i < numAllBlocks; i++) {
+    RABlock* block = _blocks[i];
+    if (block->isReachable())
+      continue;
+
+    ASMJIT_RA_LOG_FORMAT("  Removing code from unreachable block {%u}\n", i);
+    BaseNode* first = block->first();
+    BaseNode* last = block->last();
+
+    BaseNode* beforeFirst = first->prev();
+    BaseNode* afterLast = last->next();
+
+    BaseNode* node = first;
+    while (node != afterLast) {
+      BaseNode* next = node->next();
+
+      if (node->isCode() || node->isRemovable()) {
+#ifndef ASMJIT_NO_LOGGING
+        if (logger) {
+          sb.clear();
+          Formatter::formatNode(sb, _formatOptions, cc(), node);
+          logger->logf("    %s\n", sb.data());
+        }
+#endif
+        cc()->removeNode(node);
+      }
+      node = next;
+    }
+
+    if (beforeFirst->next() == afterLast) {
+      block->setFirst(nullptr);
+      block->setLast(nullptr);
+    }
+    else {
+      block->setFirst(beforeFirst->next());
+      block->setLast(afterLast->prev());
+    }
+  }
+
+  return kErrorOk;
+}
+
+BaseNode* BaseRAPass::findSuccessorStartingAt(BaseNode* node) noexcept {
+  while (node && (node->isInformative() || node->hasNoEffect()))
+    node = node->next();
+  return node;
+}
+
+bool BaseRAPass::isNextTo(BaseNode* node, BaseNode* target) noexcept {
+  for (;;) {
+    node = node->next();
+    if (node == target)
+      return true;
+
+    if (!node)
+      return false;
+
+    if (node->isCode() || node->isData())
+      return false;
+  }
+}
+
+// BaseRAPass - Registers - VirtReg / WorkReg Mapping
+// ==================================================
+
+Error BaseRAPass::_asWorkReg(VirtReg* vReg, RAWorkReg** out) noexcept {
+  // Checked by `asWorkReg()` - must be true.
+  ASMJIT_ASSERT(vReg->_workReg == nullptr);
+
+  RegGroup group = vReg->group();
+  ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+
+  RAWorkRegs& wRegs = workRegs();
+  RAWorkRegs& wRegsByGroup = workRegs(group);
+
+  ASMJIT_PROPAGATE(wRegs.willGrow(allocator()));
+  ASMJIT_PROPAGATE(wRegsByGroup.willGrow(allocator()));
+
+  RAWorkReg* wReg = zone()->newT<RAWorkReg>(vReg, wRegs.size());
+  if (ASMJIT_UNLIKELY(!wReg))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  vReg->setWorkReg(wReg);
+  if (!vReg->isStack())
+    wReg->setRegByteMask(Support::lsbMask<uint64_t>(vReg->virtSize()));
+  wRegs.appendUnsafe(wReg);
+  wRegsByGroup.appendUnsafe(wReg);
+
+  // Only used by RA logging.
+  _maxWorkRegNameSize = Support::max(_maxWorkRegNameSize, vReg->nameSize());
+
+  *out = wReg;
+  return kErrorOk;
+}
+
+RAAssignment::WorkToPhysMap* BaseRAPass::newWorkToPhysMap() noexcept {
+  uint32_t count = workRegCount();
+  size_t size = WorkToPhysMap::sizeOf(count);
+
+  // If no registers are used it could be zero, in that case return a dummy
+  // map instead of NULL.
+  if (ASMJIT_UNLIKELY(!size)) {
+    static const RAAssignment::WorkToPhysMap nullMap = {{ 0 }};
+    return const_cast<RAAssignment::WorkToPhysMap*>(&nullMap);
+  }
+
+  WorkToPhysMap* map = zone()->allocT<WorkToPhysMap>(size);
+  if (ASMJIT_UNLIKELY(!map))
+    return nullptr;
+
+  map->reset(count);
+  return map;
+}
+
+RAAssignment::PhysToWorkMap* BaseRAPass::newPhysToWorkMap() noexcept {
+  uint32_t count = physRegTotal();
+  size_t size = PhysToWorkMap::sizeOf(count);
+
+  PhysToWorkMap* map = zone()->allocT<PhysToWorkMap>(size);
+  if (ASMJIT_UNLIKELY(!map))
+    return nullptr;
+
+  map->reset(count);
+  return map;
+}
+
+// BaseRAPass - Registers - Liveness Analysis and Statistics
+// =========================================================
+
+namespace LiveOps {
+  typedef ZoneBitVector::BitWord BitWord;
+
+  struct In {
+    static ASMJIT_FORCE_INLINE BitWord op(BitWord dst, BitWord out, BitWord gen, BitWord kill) noexcept {
+      DebugUtils::unused(dst);
+      return (out | gen) & ~kill;
+    }
+  };
+
+  template<typename Operator>
+  static ASMJIT_FORCE_INLINE bool op(BitWord* dst, const BitWord* a, uint32_t n) noexcept {
+    BitWord changed = 0;
+
+    for (uint32_t i = 0; i < n; i++) {
+      BitWord before = dst[i];
+      BitWord after = Operator::op(before, a[i]);
+
+      dst[i] = after;
+      changed |= (before ^ after);
+    }
+
+    return changed != 0;
+  }
+
+  template<typename Operator>
+  static ASMJIT_FORCE_INLINE bool op(BitWord* dst, const BitWord* a, const BitWord* b, uint32_t n) noexcept {
+    BitWord changed = 0;
+
+    for (uint32_t i = 0; i < n; i++) {
+      BitWord before = dst[i];
+      BitWord after = Operator::op(before, a[i], b[i]);
+
+      dst[i] = after;
+      changed |= (before ^ after);
+    }
+
+    return changed != 0;
+  }
+
+  template<typename Operator>
+  static ASMJIT_FORCE_INLINE bool op(BitWord* dst, const BitWord* a, const BitWord* b, const BitWord* c, uint32_t n) noexcept {
+    BitWord changed = 0;
+
+    for (uint32_t i = 0; i < n; i++) {
+      BitWord before = dst[i];
+      BitWord after = Operator::op(before, a[i], b[i], c[i]);
+
+      dst[i] = after;
+      changed |= (before ^ after);
+    }
+
+    return changed != 0;
+  }
+
+  static ASMJIT_FORCE_INLINE bool recalcInOut(RABlock* block, uint32_t numBitWords, bool initial = false) noexcept {
+    bool changed = initial;
+
+    const RABlocks& successors = block->successors();
+    uint32_t numSuccessors = successors.size();
+
+    // Calculate `OUT` based on `IN` of all successors.
+    for (uint32_t i = 0; i < numSuccessors; i++)
+      changed |= op<Support::Or>(block->liveOut().data(), successors[i]->liveIn().data(), numBitWords);
+
+    // Calculate `IN` based on `OUT`, `GEN`, and `KILL` bits.
+    if (changed)
+      changed = op<In>(block->liveIn().data(), block->liveOut().data(), block->gen().data(), block->kill().data(), numBitWords);
+
+    return changed;
+  }
+}
+
+ASMJIT_FAVOR_SPEED Error BaseRAPass::buildLiveness() noexcept {
+#ifndef ASMJIT_NO_LOGGING
+  Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugLiveness);
+  StringTmp<512> sb;
+#endif
+
+  ASMJIT_RA_LOG_FORMAT("[BuildLiveness]\n");
+
+  uint32_t i;
+
+  uint32_t numAllBlocks = blockCount();
+  uint32_t numReachableBlocks = reachableBlockCount();
+
+  uint32_t numVisits = numReachableBlocks;
+  uint32_t numWorkRegs = workRegCount();
+  uint32_t numBitWords = ZoneBitVector::_wordsPerBits(numWorkRegs);
+
+  if (!numWorkRegs) {
+    ASMJIT_RA_LOG_FORMAT("  Done (no virtual registers)\n");
+    return kErrorOk;
+  }
+
+  ZoneVector<uint32_t> nUsesPerWorkReg; // Number of USEs of each RAWorkReg.
+  ZoneVector<uint32_t> nOutsPerWorkReg; // Number of OUTs of each RAWorkReg.
+  ZoneVector<uint32_t> nInstsPerBlock;  // Number of instructions of each RABlock.
+
+  ASMJIT_PROPAGATE(nUsesPerWorkReg.resize(allocator(), numWorkRegs));
+  ASMJIT_PROPAGATE(nOutsPerWorkReg.resize(allocator(), numWorkRegs));
+  ASMJIT_PROPAGATE(nInstsPerBlock.resize(allocator(), numAllBlocks));
+
+  // Calculate GEN/KILL of Each Block
+  // --------------------------------
+
+  for (i = 0; i < numReachableBlocks; i++) {
+    RABlock* block = _pov[i];
+    ASMJIT_PROPAGATE(block->resizeLiveBits(numWorkRegs));
+
+    BaseNode* node = block->last();
+    BaseNode* stop = block->first();
+
+    uint32_t nInsts = 0;
+    for (;;) {
+      if (node->isInst()) {
+        InstNode* inst = node->as<InstNode>();
+        RAInst* raInst = inst->passData<RAInst>();
+        ASMJIT_ASSERT(raInst != nullptr);
+
+        RATiedReg* tiedRegs = raInst->tiedRegs();
+        uint32_t count = raInst->tiedCount();
+
+        for (uint32_t j = 0; j < count; j++) {
+          RATiedReg* tiedReg = &tiedRegs[j];
+          uint32_t workId = tiedReg->workId();
+
+          // Update `nUses` and `nOuts`.
+          nUsesPerWorkReg[workId] += 1u;
+          nOutsPerWorkReg[workId] += uint32_t(tiedReg->isWrite());
+
+          // Mark as:
+          //   KILL - if this VirtReg is killed afterwards.
+          //   LAST - if this VirtReg is last in this basic block.
+          if (block->kill().bitAt(workId))
+            tiedReg->addFlags(RATiedFlags::kKill);
+          else if (!block->gen().bitAt(workId))
+            tiedReg->addFlags(RATiedFlags::kLast);
+
+          if (tiedReg->isWriteOnly()) {
+            // KILL.
+            block->kill().setBit(workId, true);
+          }
+          else {
+            // GEN.
+            block->kill().setBit(workId, false);
+            block->gen().setBit(workId, true);
+          }
+
+          if (tiedReg->isLeadConsecutive()) {
+            RAWorkReg* workReg = workRegById(workId);
+            workReg->markLeadConsecutive();
+          }
+
+          if (tiedReg->hasConsecutiveParent()) {
+            RAWorkReg* consecutiveParentReg = workRegById(tiedReg->consecutiveParent());
+            consecutiveParentReg->addImmediateConsecutive(allocator(), workId);
+          }
+        }
+
+        nInsts++;
+      }
+
+      if (node == stop)
+        break;
+
+      node = node->prev();
+      ASMJIT_ASSERT(node != nullptr);
+    }
+
+    nInstsPerBlock[block->blockId()] = nInsts;
+  }
+
+  // Calculate IN/OUT of Each Block
+  // ------------------------------
+
+  {
+    ZoneStack<RABlock*> workList;
+    ZoneBitVector workBits;
+
+    ASMJIT_PROPAGATE(workList.init(allocator()));
+    ASMJIT_PROPAGATE(workBits.resize(allocator(), blockCount(), true));
+
+    for (i = 0; i < numReachableBlocks; i++) {
+      RABlock* block = _pov[i];
+      LiveOps::recalcInOut(block, numBitWords, true);
+      ASMJIT_PROPAGATE(workList.append(block));
+    }
+
+    while (!workList.empty()) {
+      RABlock* block = workList.popFirst();
+      uint32_t blockId = block->blockId();
+
+      workBits.setBit(blockId, false);
+      if (LiveOps::recalcInOut(block, numBitWords)) {
+        const RABlocks& predecessors = block->predecessors();
+        uint32_t numPredecessors = predecessors.size();
+
+        for (uint32_t j = 0; j < numPredecessors; j++) {
+          RABlock* pred = predecessors[j];
+          if (!workBits.bitAt(pred->blockId())) {
+            workBits.setBit(pred->blockId(), true);
+            ASMJIT_PROPAGATE(workList.append(pred));
+          }
+        }
+      }
+      numVisits++;
+    }
+
+    workList.reset();
+    workBits.release(allocator());
+  }
+
+  ASMJIT_RA_LOG_COMPLEX({
+    logger->logf("  LiveIn/Out Done (%u visits)\n", numVisits);
+    for (i = 0; i < numAllBlocks; i++) {
+      RABlock* block = _blocks[i];
+
+      ASMJIT_PROPAGATE(sb.assignFormat("  {#%u}\n", block->blockId()));
+      ASMJIT_PROPAGATE(_dumpBlockLiveness(sb, block));
+
+      logger->log(sb);
+    }
+  });
+
+  // Reserve the space in each `RAWorkReg` for references
+  // ----------------------------------------------------
+
+  for (i = 0; i < numWorkRegs; i++) {
+    RAWorkReg* workReg = workRegById(i);
+    ASMJIT_PROPAGATE(workReg->_refs.reserve(allocator(), nUsesPerWorkReg[i]));
+    ASMJIT_PROPAGATE(workReg->_writes.reserve(allocator(), nOutsPerWorkReg[i]));
+  }
+
+  // Assign block and instruction positions, build LiveCount and LiveSpans
+  // ---------------------------------------------------------------------
+
+  uint32_t position = 2;
+  for (i = 0; i < numAllBlocks; i++) {
+    RABlock* block = _blocks[i];
+    if (!block->isReachable())
+      continue;
+
+    BaseNode* node = block->first();
+    BaseNode* stop = block->last();
+
+    uint32_t endPosition = position + nInstsPerBlock[i] * 2;
+    block->setFirstPosition(position);
+    block->setEndPosition(endPosition);
+
+    RALiveCount curLiveCount;
+    RALiveCount maxLiveCount;
+
+    // Process LIVE-IN.
+    ZoneBitVector::ForEachBitSet it(block->liveIn());
+    while (it.hasNext()) {
+      RAWorkReg* workReg = _workRegs[uint32_t(it.next())];
+      curLiveCount[workReg->group()]++;
+      ASMJIT_PROPAGATE(workReg->liveSpans().openAt(allocator(), position, endPosition));
+    }
+
+    for (;;) {
+      if (node->isInst()) {
+        InstNode* inst = node->as<InstNode>();
+        RAInst* raInst = inst->passData<RAInst>();
+        ASMJIT_ASSERT(raInst != nullptr);
+
+        RATiedReg* tiedRegs = raInst->tiedRegs();
+        uint32_t count = raInst->tiedCount();
+
+        inst->setPosition(position);
+        raInst->_liveCount = curLiveCount;
+
+        for (uint32_t j = 0; j < count; j++) {
+          RATiedReg* tiedReg = &tiedRegs[j];
+          uint32_t workId = tiedReg->workId();
+
+          // Create refs and writes.
+          RAWorkReg* workReg = workRegById(workId);
+          workReg->_refs.appendUnsafe(node);
+          if (tiedReg->isWrite())
+            workReg->_writes.appendUnsafe(node);
+
+          // We couldn't calculate this in previous steps, but since we know all LIVE-OUT at this point it becomes
+          // trivial. If this is the last instruction that uses this `workReg` and it's not LIVE-OUT then it is
+          // KILLed here.
+          if (tiedReg->isLast() && !block->liveOut().bitAt(workId))
+            tiedReg->addFlags(RATiedFlags::kKill);
+
+          LiveRegSpans& liveSpans = workReg->liveSpans();
+          bool wasOpen;
+          ASMJIT_PROPAGATE(liveSpans.openAt(allocator(), position + !tiedReg->isRead(), endPosition, wasOpen));
+
+          RegGroup group = workReg->group();
+          if (!wasOpen) {
+            curLiveCount[group]++;
+            raInst->_liveCount[group]++;
+          }
+
+          if (tiedReg->isKill()) {
+            liveSpans.closeAt(position + !tiedReg->isRead() + 1);
+            curLiveCount[group]--;
+          }
+
+          // Update `RAWorkReg::useIdMask` and `RAWorkReg::hintRegId`.
+          if (tiedReg->hasUseId()) {
+            uint32_t useId = tiedReg->useId();
+            workReg->addUseIdMask(Support::bitMask(useId));
+            if (!workReg->hasHintRegId() && !Support::bitTest(raInst->_clobberedRegs[group], useId))
+              workReg->setHintRegId(useId);
+          }
+
+          if (tiedReg->useRegMask()) {
+            workReg->restrictPreferredMask(tiedReg->useRegMask());
+            if (workReg->isLeadConsecutive())
+              workReg->restrictConsecutiveMask(tiedReg->useRegMask());
+          }
+
+          if (tiedReg->outRegMask()) {
+            workReg->restrictPreferredMask(tiedReg->outRegMask());
+            if (workReg->isLeadConsecutive())
+              workReg->restrictConsecutiveMask(tiedReg->outRegMask());
+          }
+
+          // Update `RAWorkReg::clobberedSurvivalMask`.
+          if (raInst->_clobberedRegs[group] && !tiedReg->isOutOrKill()) {
+            workReg->addClobberSurvivalMask(raInst->_clobberedRegs[group]);
+          }
+        }
+
+        position += 2;
+        maxLiveCount.op<Support::Max>(raInst->_liveCount);
+      }
+
+      if (node == stop)
+        break;
+
+      node = node->next();
+      ASMJIT_ASSERT(node != nullptr);
+    }
+
+    block->_maxLiveCount = maxLiveCount;
+    _globalMaxLiveCount.op<Support::Max>(maxLiveCount);
+    ASMJIT_ASSERT(position == block->endPosition());
+  }
+
+  // Calculate WorkReg statistics
+  // ----------------------------
+
+  for (i = 0; i < numWorkRegs; i++) {
+    RAWorkReg* workReg = _workRegs[i];
+
+    LiveRegSpans& spans = workReg->liveSpans();
+    uint32_t width = spans.width();
+    float freq = width ? float(double(workReg->_refs.size()) / double(width)) : float(0);
+
+    RALiveStats& stats = workReg->liveStats();
+    stats._width = width;
+    stats._freq = freq;
+    stats._priority = freq + float(int(workReg->virtReg()->weight())) * 0.01f;
+  }
+
+  ASMJIT_RA_LOG_COMPLEX({
+    sb.clear();
+    _dumpLiveSpans(sb);
+    logger->log(sb);
+  });
+
+  nUsesPerWorkReg.release(allocator());
+  nOutsPerWorkReg.release(allocator());
+  nInstsPerBlock.release(allocator());
+
+  return kErrorOk;
+}
+
+Error BaseRAPass::assignArgIndexToWorkRegs() noexcept {
+  ZoneBitVector& liveIn = entryBlock()->liveIn();
+  uint32_t argCount = func()->argCount();
+
+  for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      // Unassigned argument.
+      const RegOnly& regArg = func()->argPack(argIndex)[valueIndex];
+      if (!regArg.isReg() || !cc()->isVirtIdValid(regArg.id()))
+        continue;
+
+      VirtReg* virtReg = cc()->virtRegById(regArg.id());
+      if (!virtReg)
+        continue;
+
+      // Unreferenced argument.
+      RAWorkReg* workReg = virtReg->workReg();
+      if (!workReg)
+        continue;
+
+      // Overwritten argument.
+      uint32_t workId = workReg->workId();
+      if (!liveIn.bitAt(workId))
+        continue;
+
+      workReg->setArgIndex(argIndex, valueIndex);
+      const FuncValue& arg = func()->detail().arg(argIndex, valueIndex);
+
+      if (arg.isReg() && _archTraits->regTypeToGroup(arg.regType()) == workReg->group()) {
+        workReg->setHintRegId(arg.regId());
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+// BaseRAPass - Allocation - Global
+// ================================
+
+#ifndef ASMJIT_NO_LOGGING
+static void RAPass_dumpSpans(String& sb, uint32_t index, const LiveRegSpans& liveSpans) noexcept {
+  sb.appendFormat("  %02u: ", index);
+
+  for (uint32_t i = 0; i < liveSpans.size(); i++) {
+    const LiveRegSpan& liveSpan = liveSpans[i];
+    if (i) sb.append(", ");
+    sb.appendFormat("[%u:%u@%u]", liveSpan.a, liveSpan.b, liveSpan.id);
+  }
+
+  sb.append('\n');
+}
+#endif
+
+Error BaseRAPass::runGlobalAllocator() noexcept {
+  ASMJIT_PROPAGATE(initGlobalLiveSpans());
+
+  for (RegGroup group : RegGroupVirtValues{}) {
+    ASMJIT_PROPAGATE(binPack(group));
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SPEED Error BaseRAPass::initGlobalLiveSpans() noexcept {
+  for (RegGroup group : RegGroupVirtValues{}) {
+    size_t physCount = _physRegCount[group];
+    LiveRegSpans* liveSpans = nullptr;
+
+    if (physCount) {
+      liveSpans = allocator()->allocT<LiveRegSpans>(physCount * sizeof(LiveRegSpans));
+      if (ASMJIT_UNLIKELY(!liveSpans))
+        return DebugUtils::errored(kErrorOutOfMemory);
+
+      for (size_t physId = 0; physId < physCount; physId++)
+        new(&liveSpans[physId]) LiveRegSpans();
+    }
+
+    _globalLiveSpans[group] = liveSpans;
+  }
+
+  return kErrorOk;
+}
+
+struct RAConsecutiveReg {
+  RAWorkReg* workReg;
+  RAWorkReg* parentReg;
+};
+
+ASMJIT_FAVOR_SPEED Error BaseRAPass::binPack(RegGroup group) noexcept {
+  if (workRegCount(group) == 0)
+    return kErrorOk;
+
+#ifndef ASMJIT_NO_LOGGING
+  Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugAssignment);
+  StringTmp<512> sb;
+
+  ASMJIT_RA_LOG_FORMAT("[BinPack] Available=%u (0x%08X) Count=%u RegGroup=%u\n",
+    Support::popcnt(_availableRegs[group]),
+    _availableRegs[group],
+    workRegCount(group),
+    uint32_t(group));
+#endif
+
+  uint32_t i;
+  uint32_t physCount = _physRegCount[group];
+
+  RAWorkRegs workRegs;
+  ZoneVector<RAConsecutiveReg> consecutiveRegs;
+  LiveRegSpans tmpSpans;
+
+  ASMJIT_PROPAGATE(workRegs.concat(allocator(), this->workRegs(group)));
+  workRegs.sort([](const RAWorkReg* a, const RAWorkReg* b) noexcept {
+    return b->liveStats().priority() - a->liveStats().priority();
+  });
+
+  uint32_t numWorkRegs = workRegs.size();
+  RegMask availableRegs = _availableRegs[group];
+
+  // First try to pack everything that provides register-id hint as these are most likely function arguments and fixed
+  // (precolored) virtual registers.
+  if (!workRegs.empty()) {
+    uint32_t dstIndex = 0;
+
+    for (i = 0; i < numWorkRegs; i++) {
+      RAWorkReg* workReg = workRegs[i];
+
+      if (workReg->isLeadConsecutive()) {
+        ASMJIT_PROPAGATE(consecutiveRegs.append(allocator(), RAConsecutiveReg{workReg, nullptr}));
+        workReg->markProcessedConsecutive();
+      }
+
+      if (workReg->hasHintRegId()) {
+        uint32_t physId = workReg->hintRegId();
+        if (Support::bitTest(availableRegs, physId)) {
+          LiveRegSpans& live = _globalLiveSpans[group][physId];
+          Error err = tmpSpans.nonOverlappingUnionOf(allocator(), live, workReg->liveSpans(), LiveRegData(workReg->virtId()));
+
+          if (err == kErrorOk) {
+            live.swap(tmpSpans);
+            workReg->setHomeRegId(physId);
+            workReg->markAllocated();
+            continue;
+          }
+
+          if (err != 0xFFFFFFFFu)
+            return err;
+        }
+      }
+
+      workRegs[dstIndex++] = workReg;
+    }
+
+    workRegs._setSize(dstIndex);
+    numWorkRegs = dstIndex;
+  }
+
+  // Allocate consecutive registers - both leads and all consecutives. This is important and prioritized over the rest,
+  // because once a lead is allocated we really need to allocate its consecutives, otherwise we may bin pack other
+  // registers into their places, which would result in wrong hints to the local allocator, and then into many moves
+  // or spills.
+  if (!consecutiveRegs.empty()) {
+    // This loop appends all other consecutive registers into `consecutiveRegs` array. Leads are at the beginning,
+    // non-leads follow.
+    i = 0;
+    for (;;) {
+      uint32_t stop = consecutiveRegs.size();
+      if (i == stop)
+        break;
+
+      while (i < stop) {
+        RAWorkReg* workReg = consecutiveRegs[i].workReg;
+        if (workReg->hasImmediateConsecutives()) {
+          ZoneBitVector::ForEachBitSet it(workReg->immediateConsecutives());
+          while (it.hasNext()) {
+            uint32_t consecutiveWorkId = uint32_t(it.next());
+            RAWorkReg* consecutiveReg = workRegById(consecutiveWorkId);
+            if (!consecutiveReg->isProcessedConsecutive()) {
+              ASMJIT_PROPAGATE(consecutiveRegs.append(allocator(), RAConsecutiveReg{consecutiveReg, workReg}));
+              consecutiveReg->markProcessedConsecutive();
+            }
+          }
+        }
+        i++;
+      }
+    }
+
+    uint32_t numConsecutiveRegs = consecutiveRegs.size();
+    for (i = 0; i < numConsecutiveRegs; i++) {
+      RAWorkReg* workReg = consecutiveRegs[i].workReg;
+      if (workReg->isAllocated())
+        continue;
+
+      RAWorkReg* parentReg = consecutiveRegs[i].parentReg;
+      RegMask physRegs = 0;
+
+      if (!parentReg) {
+        physRegs = availableRegs & workReg->preferredMask();
+        if (!physRegs) {
+          physRegs = availableRegs & workReg->consecutiveMask();
+
+          // NOTE: This should never be true as it would mean we would never allocate this virtual register
+          // (not here, and not later when local register allocator processes RATiedReg sets).
+          if (ASMJIT_UNLIKELY(!physRegs))
+            return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
+        }
+      }
+      else if (parentReg->hasHomeRegId()) {
+        uint32_t consecutiveId = parentReg->homeRegId() + 1;
+
+        // NOTE: We don't support wrapping. If this goes beyond all allocable registers there is something wrong.
+        if (consecutiveId > 31 || !Support::bitTest(availableRegs, consecutiveId))
+          return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
+
+        workReg->setHintRegId(consecutiveId);
+        physRegs = Support::bitMask(consecutiveId);
+      }
+
+      while (physRegs) {
+        uint32_t physId = Support::bitSizeOf<RegMask>() - 1 - Support::clz(physRegs);
+
+        LiveRegSpans& live = _globalLiveSpans[group][physId];
+        Error err = tmpSpans.nonOverlappingUnionOf(allocator(), live, workReg->liveSpans(), LiveRegData(workReg->virtId()));
+
+        if (err == kErrorOk) {
+          workReg->setHomeRegId(physId);
+          workReg->markAllocated();
+          live.swap(tmpSpans);
+          break;
+        }
+
+        if (ASMJIT_UNLIKELY(err != 0xFFFFFFFFu))
+          return err;
+
+        physRegs ^= Support::bitMask(physId);
+      }
+    }
+  }
+
+  // Try to pack the rest.
+  if (!workRegs.empty()) {
+    uint32_t dstIndex = 0;
+
+    for (i = 0; i < numWorkRegs; i++) {
+      RAWorkReg* workReg = workRegs[i];
+
+      if (workReg->isAllocated())
+        continue;
+
+      RegMask physRegs = availableRegs;
+      if (physRegs & workReg->preferredMask())
+        physRegs &= workReg->preferredMask();
+
+      while (physRegs) {
+        RegMask preferredMask = physRegs;
+        uint32_t physId = Support::ctz(preferredMask);
+
+        if (workReg->clobberSurvivalMask()) {
+          preferredMask &= workReg->clobberSurvivalMask();
+          if (preferredMask)
+            physId = Support::ctz(preferredMask);
+        }
+
+        LiveRegSpans& live = _globalLiveSpans[group][physId];
+        Error err = tmpSpans.nonOverlappingUnionOf(allocator(), live, workReg->liveSpans(), LiveRegData(workReg->virtId()));
+
+        if (err == kErrorOk) {
+          workReg->setHomeRegId(physId);
+          workReg->markAllocated();
+          live.swap(tmpSpans);
+          break;
+        }
+
+        if (ASMJIT_UNLIKELY(err != 0xFFFFFFFFu))
+          return err;
+
+        physRegs ^= Support::bitMask(physId);
+      }
+
+      // Keep it in `workRegs` if it was not allocated.
+      if (!physRegs)
+        workRegs[dstIndex++] = workReg;
+    }
+
+    workRegs._setSize(dstIndex);
+    numWorkRegs = dstIndex;
+  }
+
+  ASMJIT_RA_LOG_COMPLEX({
+    for (uint32_t physId = 0; physId < physCount; physId++) {
+      LiveRegSpans& live = _globalLiveSpans[group][physId];
+      if (live.empty())
+        continue;
+
+      sb.clear();
+      RAPass_dumpSpans(sb, physId, live);
+      logger->log(sb);
+    }
+  });
+
+  // Maybe unused if logging is disabled.
+  DebugUtils::unused(physCount);
+
+  if (workRegs.empty()) {
+    ASMJIT_RA_LOG_FORMAT("  Completed.\n");
+  }
+  else {
+    _strategy[group].setType(RAStrategyType::kComplex);
+    for (RAWorkReg* workReg : workRegs)
+      workReg->markStackPreferred();
+
+    ASMJIT_RA_LOG_COMPLEX({
+      uint32_t count = workRegs.size();
+      sb.clear();
+      sb.appendFormat("  Unassigned (%u): ", count);
+      for (i = 0; i < numWorkRegs; i++) {
+        RAWorkReg* workReg = workRegs[i];
+        if (i) sb.append(", ");
+        sb.append(workReg->name());
+      }
+      sb.append('\n');
+      logger->log(sb);
+    });
+  }
+
+  return kErrorOk;
+}
+
+// BaseRAPass - Allocation - Local
+// ===============================
+
+Error BaseRAPass::runLocalAllocator() noexcept {
+  RALocalAllocator lra(this);
+  ASMJIT_PROPAGATE(lra.init());
+
+  if (!blockCount())
+    return kErrorOk;
+
+  // The allocation is done when this reaches zero.
+  uint32_t blocksRemaining = reachableBlockCount();
+
+  // Current block.
+  uint32_t blockId = 0;
+  RABlock* block = _blocks[blockId];
+
+  // The first block (entry) must always be reachable.
+  ASMJIT_ASSERT(block->isReachable());
+
+  // Assign function arguments for the initial block. The `lra` is valid now.
+  lra.makeInitialAssignment();
+  ASMJIT_PROPAGATE(setBlockEntryAssignment(block, block, lra._curAssignment));
+
+  // The loop starts from the first block and iterates blocks in order, however, the algorithm also allows to jump to
+  // any other block when finished if it's a jump target. In-order iteration just makes sure that all blocks are visited.
+  for (;;) {
+    BaseNode* first = block->first();
+    BaseNode* last = block->last();
+    BaseNode* terminator = block->hasTerminator() ? last : nullptr;
+
+    BaseNode* beforeFirst = first->prev();
+    BaseNode* afterLast = last->next();
+
+    bool unconditionalJump = false;
+    RABlock* consecutive = nullptr;
+
+    if (block->hasSuccessors())
+      consecutive = block->successors()[0];
+
+    lra.setBlock(block);
+    block->makeAllocated();
+
+    BaseNode* node = first;
+    while (node != afterLast) {
+      BaseNode* next = node->next();
+      if (node->isInst()) {
+        InstNode* inst = node->as<InstNode>();
+
+        if (ASMJIT_UNLIKELY(inst == terminator)) {
+          const RABlocks& successors = block->successors();
+          if (block->hasConsecutive()) {
+            ASMJIT_PROPAGATE(lra.allocBranch(inst, successors.last(), successors.first()));
+
+            node = next;
+            continue;
+          }
+          else if (successors.size() > 1) {
+            RABlock* cont = block->hasConsecutive() ? successors.first() : nullptr;
+            ASMJIT_PROPAGATE(lra.allocJumpTable(inst, successors, cont));
+
+            node = next;
+            continue;
+          }
+          else {
+            // Otherwise this is an unconditional jump, special handling isn't required.
+            unconditionalJump = true;
+          }
+        }
+
+        ASMJIT_PROPAGATE(lra.allocInst(inst));
+        if (inst->type() == NodeType::kInvoke)
+          ASMJIT_PROPAGATE(emitPreCall(inst->as<InvokeNode>()));
+        else
+          ASMJIT_PROPAGATE(lra.spillAfterAllocation(inst));
+      }
+      node = next;
+    }
+
+    if (consecutive) {
+      BaseNode* prev = afterLast ? afterLast->prev() : cc()->lastNode();
+      cc()->_setCursor(unconditionalJump ? prev->prev() : prev);
+
+      if (consecutive->hasEntryAssignment()) {
+        ASMJIT_PROPAGATE(lra.switchToAssignment(consecutive->entryPhysToWorkMap(), consecutive->liveIn(), consecutive->isAllocated(), false));
+      }
+      else {
+        ASMJIT_PROPAGATE(lra.spillRegsBeforeEntry(consecutive));
+        ASMJIT_PROPAGATE(setBlockEntryAssignment(consecutive, block, lra._curAssignment));
+        lra._curAssignment.copyFrom(consecutive->entryPhysToWorkMap());
+      }
+    }
+
+    // Important as the local allocator can insert instructions before
+    // and after any instruction within the basic block.
+    block->setFirst(beforeFirst->next());
+    block->setLast(afterLast ? afterLast->prev() : cc()->lastNode());
+
+    if (--blocksRemaining == 0)
+      break;
+
+    // Switch to the next consecutive block, if any.
+    if (consecutive) {
+      block = consecutive;
+      if (!block->isAllocated())
+        continue;
+    }
+
+    // Get the next block.
+    for (;;) {
+      if (++blockId >= blockCount())
+        blockId = 0;
+
+      block = _blocks[blockId];
+      if (!block->isReachable() || block->isAllocated() || !block->hasEntryAssignment())
+        continue;
+
+      break;
+    }
+
+    // If we switched to some block we have to update the local allocator.
+    lra.replaceAssignment(block->entryPhysToWorkMap());
+  }
+
+  _clobberedRegs.op<Support::Or>(lra._clobberedRegs);
+  return kErrorOk;
+}
+
+Error BaseRAPass::setBlockEntryAssignment(RABlock* block, const RABlock* fromBlock, const RAAssignment& fromAssignment) noexcept {
+  if (block->hasSharedAssignmentId()) {
+    uint32_t sharedAssignmentId = block->sharedAssignmentId();
+
+    // Shouldn't happen. Entry assignment of a block that has a shared-state will assign to all blocks
+    // with the same sharedAssignmentId. It's a bug if the shared state has been already assigned.
+    if (!_sharedAssignments[sharedAssignmentId].empty())
+      return DebugUtils::errored(kErrorInvalidState);
+
+    return setSharedAssignment(sharedAssignmentId, fromAssignment);
+  }
+
+  PhysToWorkMap* physToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
+  if (ASMJIT_UNLIKELY(!physToWorkMap))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  block->setEntryAssignment(physToWorkMap);
+
+  // True if this is the first (entry) block, nothing to do in this case.
+  if (block == fromBlock) {
+    // Entry block should never have a shared state.
+    if (block->hasSharedAssignmentId())
+      return DebugUtils::errored(kErrorInvalidState);
+
+    return kErrorOk;
+  }
+
+  const ZoneBitVector& liveOut = fromBlock->liveOut();
+  const ZoneBitVector& liveIn = block->liveIn();
+
+  // It's possible that `fromBlock` has LIVE-OUT regs that `block` doesn't
+  // have in LIVE-IN, these have to be unassigned.
+  {
+    ZoneBitVector::ForEachBitOp<Support::AndNot> it(liveOut, liveIn);
+    while (it.hasNext()) {
+      uint32_t workId = uint32_t(it.next());
+      RAWorkReg* workReg = workRegById(workId);
+
+      RegGroup group = workReg->group();
+      uint32_t physId = fromAssignment.workToPhysId(group, workId);
+
+      if (physId != RAAssignment::kPhysNone)
+        physToWorkMap->unassign(group, physId, _physRegIndex.get(group) + physId);
+    }
+  }
+
+  return blockEntryAssigned(physToWorkMap);
+}
+
+Error BaseRAPass::setSharedAssignment(uint32_t sharedAssignmentId, const RAAssignment& fromAssignment) noexcept {
+  ASMJIT_ASSERT(_sharedAssignments[sharedAssignmentId].empty());
+
+  PhysToWorkMap* physToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
+  if (ASMJIT_UNLIKELY(!physToWorkMap))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  _sharedAssignments[sharedAssignmentId].assignPhysToWorkMap(physToWorkMap);
+
+  ZoneBitVector& sharedLiveIn = _sharedAssignments[sharedAssignmentId]._liveIn;
+  ASMJIT_PROPAGATE(sharedLiveIn.resize(allocator(), workRegCount()));
+
+  Support::Array<uint32_t, Globals::kNumVirtGroups> sharedAssigned {};
+  for (RABlock* block : blocks()) {
+    if (block->sharedAssignmentId() == sharedAssignmentId) {
+      ASMJIT_ASSERT(!block->hasEntryAssignment());
+
+      PhysToWorkMap* entryPhysToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
+      if (ASMJIT_UNLIKELY(!entryPhysToWorkMap))
+        return DebugUtils::errored(kErrorOutOfMemory);
+
+      block->setEntryAssignment(entryPhysToWorkMap);
+
+      const ZoneBitVector& liveIn = block->liveIn();
+      sharedLiveIn.or_(liveIn);
+
+      for (RegGroup group : RegGroupVirtValues{}) {
+        sharedAssigned[group] |= entryPhysToWorkMap->assigned[group];
+
+        uint32_t physBaseIndex = _physRegIndex.get(group);
+        Support::BitWordIterator<RegMask> it(entryPhysToWorkMap->assigned[group]);
+
+        while (it.hasNext()) {
+          uint32_t physId = it.next();
+          uint32_t workId = entryPhysToWorkMap->workIds[physBaseIndex + physId];
+
+          if (!liveIn.bitAt(workId))
+            entryPhysToWorkMap->unassign(group, physId, physBaseIndex + physId);
+        }
+      }
+    }
+  }
+
+  for (RegGroup group : RegGroupVirtValues{}) {
+    uint32_t physBaseIndex = _physRegIndex.get(group);
+    Support::BitWordIterator<RegMask> it(_availableRegs[group] & ~sharedAssigned[group]);
+
+    while (it.hasNext()) {
+      uint32_t physId = it.next();
+      if (Support::bitTest(physToWorkMap->assigned[group], physId))
+        physToWorkMap->unassign(group, physId, physBaseIndex + physId);
+    }
+  }
+
+  return blockEntryAssigned(physToWorkMap);
+}
+
+Error BaseRAPass::blockEntryAssigned(const PhysToWorkMap* physToWorkMap) noexcept {
+  // Complex allocation strategy requires to record register assignments upon block entry (or per shared state).
+  for (RegGroup group : RegGroupVirtValues{}) {
+    if (!_strategy[group].isComplex())
+      continue;
+
+    uint32_t physBaseIndex = _physRegIndex[group];
+    Support::BitWordIterator<RegMask> it(physToWorkMap->assigned[group]);
+
+    while (it.hasNext()) {
+      uint32_t physId = it.next();
+      uint32_t workId = physToWorkMap->workIds[physBaseIndex + physId];
+
+      RAWorkReg* workReg = workRegById(workId);
+      workReg->addAllocatedMask(Support::bitMask(physId));
+    }
+  }
+
+  return kErrorOk;
+}
+
+// BaseRAPass - Allocation - Utilities
+// ===================================
+
+Error BaseRAPass::useTemporaryMem(BaseMem& out, uint32_t size, uint32_t alignment) noexcept {
+  ASMJIT_ASSERT(alignment <= 64);
+
+  if (_temporaryMem.isNone()) {
+    ASMJIT_PROPAGATE(cc()->_newStack(&_temporaryMem.as<BaseMem>(), size, alignment));
+  }
+  else {
+    ASMJIT_ASSERT(_temporaryMem.as<BaseMem>().isRegHome());
+
+    uint32_t virtId = _temporaryMem.as<BaseMem>().baseId();
+    VirtReg* virtReg = cc()->virtRegById(virtId);
+
+    cc()->setStackSize(virtId, Support::max(virtReg->virtSize(), size),
+                               Support::max(virtReg->alignment(), alignment));
+  }
+
+  out = _temporaryMem.as<BaseMem>();
+  return kErrorOk;
+}
+
+// BaseRAPass - Allocation - Prolog & Epilog
+// =========================================
+
+Error BaseRAPass::updateStackFrame() noexcept {
+  // Update some StackFrame information that we updated during allocation. The only information we don't have at the
+  // moment is final local stack size, which is calculated last.
+  FuncFrame& frame = func()->frame();
+  for (RegGroup group : RegGroupVirtValues{})
+    frame.addDirtyRegs(group, _clobberedRegs[group]);
+  frame.setLocalStackAlignment(_stackAllocator.alignment());
+
+  // If there are stack arguments that are not assigned to registers upon entry and the function doesn't require
+  // dynamic stack alignment we keep these arguments where they are. This will also mark all stack slots that match
+  // these arguments as allocated.
+  if (_numStackArgsToStackSlots)
+    ASMJIT_PROPAGATE(_markStackArgsToKeep());
+
+  // Calculate offsets of all stack slots and update StackSize to reflect the calculated local stack size.
+  ASMJIT_PROPAGATE(_stackAllocator.calculateStackFrame());
+  frame.setLocalStackSize(_stackAllocator.stackSize());
+
+  // Update the stack frame based on `_argsAssignment` and finalize it. Finalization means to apply final calculation
+  // to the stack layout.
+  ASMJIT_PROPAGATE(_argsAssignment.updateFuncFrame(frame));
+  ASMJIT_PROPAGATE(frame.finalize());
+
+  // StackAllocator allocates all stots starting from [0], adjust them when necessary.
+  if (frame.localStackOffset() != 0)
+    ASMJIT_PROPAGATE(_stackAllocator.adjustSlotOffsets(int32_t(frame.localStackOffset())));
+
+  // Again, if there are stack arguments allocated in function's stack we have to handle them. This handles all cases
+  // (either regular or dynamic stack alignment).
+  if (_numStackArgsToStackSlots)
+    ASMJIT_PROPAGATE(_updateStackArgs());
+
+  return kErrorOk;
+}
+
+Error BaseRAPass::_markStackArgsToKeep() noexcept {
+  FuncFrame& frame = func()->frame();
+  bool hasSAReg = frame.hasPreservedFP() || !frame.hasDynamicAlignment();
+
+  RAWorkRegs& workRegs = _workRegs;
+  uint32_t numWorkRegs = workRegCount();
+
+  for (uint32_t workId = 0; workId < numWorkRegs; workId++) {
+    RAWorkReg* workReg = workRegs[workId];
+    if (workReg->hasFlag(RAWorkRegFlags::kStackArgToStack)) {
+      ASMJIT_ASSERT(workReg->hasArgIndex());
+      const FuncValue& srcArg = _func->detail().arg(workReg->argIndex());
+
+      // If the register doesn't have stack slot then we failed. It doesn't make much sense as it was marked as
+      // `kFlagStackArgToStack`, which requires the WorkReg was live-in upon function entry.
+      RAStackSlot* slot = workReg->stackSlot();
+      if (ASMJIT_UNLIKELY(!slot))
+        return DebugUtils::errored(kErrorInvalidState);
+
+      if (hasSAReg && srcArg.isStack() && !srcArg.isIndirect()) {
+        uint32_t typeSize = TypeUtils::sizeOf(srcArg.typeId());
+        if (typeSize == slot->size()) {
+          slot->addFlags(RAStackSlot::kFlagStackArg);
+          continue;
+        }
+      }
+
+      // NOTE: Update StackOffset here so when `_argsAssignment.updateFuncFrame()` is called it will take into
+      // consideration moving to stack slots. Without this we may miss some scratch registers later.
+      FuncValue& dstArg = _argsAssignment.arg(workReg->argIndex(), workReg->argValueIndex());
+      dstArg.assignStackOffset(0);
+    }
+  }
+
+  return kErrorOk;
+}
+
+Error BaseRAPass::_updateStackArgs() noexcept {
+  FuncFrame& frame = func()->frame();
+  RAWorkRegs& workRegs = _workRegs;
+  uint32_t numWorkRegs = workRegCount();
+
+  for (uint32_t workId = 0; workId < numWorkRegs; workId++) {
+    RAWorkReg* workReg = workRegs[workId];
+    if (workReg->hasFlag(RAWorkRegFlags::kStackArgToStack)) {
+      ASMJIT_ASSERT(workReg->hasArgIndex());
+      RAStackSlot* slot = workReg->stackSlot();
+
+      if (ASMJIT_UNLIKELY(!slot))
+        return DebugUtils::errored(kErrorInvalidState);
+
+      if (slot->isStackArg()) {
+        const FuncValue& srcArg = _func->detail().arg(workReg->argIndex());
+        if (frame.hasPreservedFP()) {
+          slot->setBaseRegId(_fp.id());
+          slot->setOffset(int32_t(frame.saOffsetFromSA()) + srcArg.stackOffset());
+        }
+        else {
+          slot->setOffset(int32_t(frame.saOffsetFromSP()) + srcArg.stackOffset());
+        }
+      }
+      else {
+        FuncValue& dstArg = _argsAssignment.arg(workReg->argIndex(), workReg->argValueIndex());
+        dstArg.setStackOffset(slot->offset());
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+Error BaseRAPass::insertPrologEpilog() noexcept {
+  FuncFrame& frame = _func->frame();
+
+  cc()->_setCursor(func());
+  ASMJIT_PROPAGATE(cc()->emitProlog(frame));
+  ASMJIT_PROPAGATE(_iEmitHelper->emitArgsAssignment(frame, _argsAssignment));
+
+  cc()->_setCursor(func()->exitNode());
+  ASMJIT_PROPAGATE(cc()->emitEpilog(frame));
+
+  return kErrorOk;
+}
+
+// BaseRAPass - Rewriter
+// =====================
+
+Error BaseRAPass::rewrite() noexcept {
+  return _rewrite(_func, _stop);
+}
+
+// BaseRAPass - Logging
+// ====================
+
+#ifndef ASMJIT_NO_LOGGING
+static void RAPass_formatLiveness(BaseRAPass* pass, String& sb, const RAInst* raInst) noexcept {
+  const RATiedReg* tiedRegs = raInst->tiedRegs();
+  uint32_t tiedCount = raInst->tiedCount();
+
+  for (uint32_t i = 0; i < tiedCount; i++) {
+    const RATiedReg& tiedReg = tiedRegs[i];
+
+    if (i != 0)
+      sb.append(' ');
+
+    sb.appendFormat("%s{", pass->workRegById(tiedReg.workId())->name());
+    sb.append(tiedReg.isReadWrite() ? 'X' :
+              tiedReg.isRead()      ? 'R' :
+              tiedReg.isWrite()     ? 'W' : '?');
+
+    if (tiedReg.isLeadConsecutive())
+      sb.appendFormat("|Lead[%u]", tiedReg.consecutiveData() + 1u);
+
+    if (tiedReg.hasUseId())
+      sb.appendFormat("|Use=%u", tiedReg.useId());
+    else if (tiedReg.isUse())
+      sb.append("|Use");
+
+    if (tiedReg.isUseConsecutive() && !tiedReg.isLeadConsecutive())
+      sb.appendFormat("+%u", tiedReg.consecutiveData());
+
+    if (tiedReg.hasOutId())
+      sb.appendFormat("|Out=%u", tiedReg.outId());
+    else if (tiedReg.isOut())
+      sb.append("|Out");
+
+    if (tiedReg.isOutConsecutive() && !tiedReg.isLeadConsecutive())
+      sb.appendFormat("+%u", tiedReg.consecutiveData());
+
+    if (tiedReg.isLast())
+      sb.append("|Last");
+
+    if (tiedReg.isKill())
+      sb.append("|Kill");
+
+    sb.append("}");
+  }
+}
+
+ASMJIT_FAVOR_SIZE Error BaseRAPass::annotateCode() noexcept {
+  StringTmp<1024> sb;
+
+  for (const RABlock* block : _blocks) {
+    BaseNode* node = block->first();
+    if (!node) continue;
+
+    BaseNode* last = block->last();
+    for (;;) {
+      sb.clear();
+      Formatter::formatNode(sb, _formatOptions, cc(), node);
+
+      if (hasDiagnosticOption(DiagnosticOptions::kRADebugLiveness) && node->isInst() && node->hasPassData()) {
+        const RAInst* raInst = node->passData<RAInst>();
+        if (raInst->tiedCount() > 0) {
+          sb.padEnd(40);
+          sb.append(" | ");
+          RAPass_formatLiveness(this, sb, raInst);
+        }
+      }
+
+      node->setInlineComment(static_cast<char*>(cc()->_dataZone.dup(sb.data(), sb.size(), true)));
+      if (node == last)
+        break;
+      node = node->next();
+    }
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error BaseRAPass::_dumpBlockIds(String& sb, const RABlocks& blocks) noexcept {
+  for (uint32_t i = 0, size = blocks.size(); i < size; i++) {
+    const RABlock* block = blocks[i];
+    if (i != 0)
+      ASMJIT_PROPAGATE(sb.appendFormat(", #%u", block->blockId()));
+    else
+      ASMJIT_PROPAGATE(sb.appendFormat("#%u", block->blockId()));
+  }
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error BaseRAPass::_dumpBlockLiveness(String& sb, const RABlock* block) noexcept {
+  for (uint32_t liveType = 0; liveType < RABlock::kLiveCount; liveType++) {
+    const char* bitsName = liveType == RABlock::kLiveIn  ? "IN  " :
+                           liveType == RABlock::kLiveOut ? "OUT " :
+                           liveType == RABlock::kLiveGen ? "GEN " : "KILL";
+
+    const ZoneBitVector& bits = block->_liveBits[liveType];
+    uint32_t size = bits.size();
+    ASMJIT_ASSERT(size <= workRegCount());
+
+    uint32_t n = 0;
+    for (uint32_t workId = 0; workId < size; workId++) {
+      if (bits.bitAt(workId)) {
+        RAWorkReg* wReg = workRegById(workId);
+
+        if (!n)
+          sb.appendFormat("    %s [", bitsName);
+        else
+          sb.append(", ");
+
+        sb.append(wReg->name());
+        n++;
+      }
+    }
+
+    if (n)
+      sb.append("]\n");
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error BaseRAPass::_dumpLiveSpans(String& sb) noexcept {
+  uint32_t numWorkRegs = _workRegs.size();
+  uint32_t maxSize = _maxWorkRegNameSize;
+
+  for (uint32_t workId = 0; workId < numWorkRegs; workId++) {
+    RAWorkReg* workReg = _workRegs[workId];
+
+    sb.append("  ");
+
+    size_t oldSize = sb.size();
+    sb.append(workReg->name());
+    sb.padEnd(oldSize + maxSize);
+
+    RALiveStats& stats = workReg->liveStats();
+    sb.appendFormat(" {id:%04u width: %-4u freq: %0.4f priority=%0.4f}",
+      workReg->virtId(),
+      stats.width(),
+      stats.freq(),
+      stats.priority());
+    sb.append(": ");
+
+    LiveRegSpans& liveSpans = workReg->liveSpans();
+    for (uint32_t x = 0; x < liveSpans.size(); x++) {
+      const LiveRegSpan& liveSpan = liveSpans[x];
+      if (x)
+        sb.append(", ");
+      sb.appendFormat("[%u:%u]", liveSpan.a, liveSpan.b);
+    }
+
+    sb.append('\n');
+  }
+
+  return kErrorOk;
+}
+#endif
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
diff --git a/lib/lepton/asmjit/core/rapass_p.h b/lib/lepton/asmjit/core/rapass_p.h
new file mode 100644
index 0000000000..9473829366
--- /dev/null
+++ b/lib/lepton/asmjit/core/rapass_p.h
@@ -0,0 +1,1183 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_RAPASS_P_H_INCLUDED
+#define ASMJIT_CORE_RAPASS_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/compiler.h"
+#include "../core/emithelper_p.h"
+#include "../core/raassignment_p.h"
+#include "../core/radefs_p.h"
+#include "../core/rastack_p.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_ra
+//! \{
+
+//! Flags used by \ref RABlock.
+enum class RABlockFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+
+  //! Block has been constructed from nodes.
+  kIsConstructed = 0x00000001u,
+  //! Block is reachable (set by `buildCFGViews()`).
+  kIsReachable = 0x00000002u,
+  //! Block is a target (has an associated label or multiple labels).
+  kIsTargetable = 0x00000004u,
+  //! Block has been allocated.
+  kIsAllocated = 0x00000008u,
+  //! Block is a function-exit.
+  kIsFuncExit = 0x00000010u,
+
+  //! Block has a terminator (jump, conditional jump, ret).
+  kHasTerminator = 0x00000100u,
+  //! Block naturally flows to the next block.
+  kHasConsecutive = 0x00000200u,
+  //! Block has a jump to a jump-table at the end.
+  kHasJumpTable = 0x00000400u,
+  //! Block contains fixed registers (precolored).
+  kHasFixedRegs = 0x00000800u,
+  //! Block contains function calls.
+  kHasFuncCalls = 0x00001000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(RABlockFlags)
+
+//! Basic block used by register allocator pass.
+class RABlock {
+public:
+  ASMJIT_NONCOPYABLE(RABlock)
+
+  typedef RAAssignment::PhysToWorkMap PhysToWorkMap;
+  typedef RAAssignment::WorkToPhysMap WorkToPhysMap;
+
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    //! Unassigned block id.
+    kUnassignedId = 0xFFFFFFFFu
+  };
+
+  enum LiveType : uint32_t {
+    kLiveIn = 0,
+    kLiveOut = 1,
+    kLiveGen = 2,
+    kLiveKill = 3,
+    kLiveCount = 4
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Register allocator pass.
+  BaseRAPass* _ra;
+
+  //! Block id (indexed from zero).
+  uint32_t _blockId = kUnassignedId;
+  //! Block flags, see `Flags`.
+  RABlockFlags _flags = RABlockFlags::kNone;
+
+  //! First `BaseNode` of this block (inclusive).
+  BaseNode* _first = nullptr;
+  //! Last `BaseNode` of this block (inclusive).
+  BaseNode* _last = nullptr;
+
+  //! Initial position of this block (inclusive).
+  uint32_t _firstPosition = 0;
+  //! End position of this block (exclusive).
+  uint32_t _endPosition = 0;
+
+  //! Weight of this block (default 0, each loop adds one).
+  uint32_t _weight = 0;
+  //! Post-order view order, used during POV construction.
+  uint32_t _povOrder = 0;
+
+  //! Basic statistics about registers.
+  RARegsStats _regsStats = RARegsStats();
+  //! Maximum live-count per register group.
+  RALiveCount _maxLiveCount = RALiveCount();
+
+  //! Timestamp (used by block visitors).
+  mutable uint64_t _timestamp = 0;
+  //! Immediate dominator of this block.
+  RABlock* _idom = nullptr;
+
+  //! Block predecessors.
+  RABlocks _predecessors {};
+  //! Block successors.
+  RABlocks _successors {};
+
+  //! Liveness in/out/use/kill.
+  ZoneBitVector _liveBits[kLiveCount] {};
+
+  //! Shared assignment it or `Globals::kInvalidId` if this block doesn't have shared assignment.
+  //! See \ref RASharedAssignment for more details.
+  uint32_t _sharedAssignmentId = Globals::kInvalidId;
+  //! Scratch registers that cannot be allocated upon block entry.
+  RegMask _entryScratchGpRegs = 0;
+  //! Scratch registers used at exit, by a terminator instruction.
+  RegMask _exitScratchGpRegs = 0;
+
+  //! Register assignment on entry.
+  PhysToWorkMap* _entryPhysToWorkMap = nullptr;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RABlock(BaseRAPass* ra) noexcept
+    : _ra(ra) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline BaseRAPass* pass() const noexcept { return _ra; }
+  inline ZoneAllocator* allocator() const noexcept;
+
+  inline uint32_t blockId() const noexcept { return _blockId; }
+  inline RABlockFlags flags() const noexcept { return _flags; }
+
+  inline bool hasFlag(RABlockFlags flag) const noexcept { return Support::test(_flags, flag); }
+  inline void addFlags(RABlockFlags flags) noexcept { _flags |= flags; }
+
+  inline bool isAssigned() const noexcept { return _blockId != kUnassignedId; }
+
+  inline bool isConstructed() const noexcept { return hasFlag(RABlockFlags::kIsConstructed); }
+  inline bool isReachable() const noexcept { return hasFlag(RABlockFlags::kIsReachable); }
+  inline bool isTargetable() const noexcept { return hasFlag(RABlockFlags::kIsTargetable); }
+  inline bool isAllocated() const noexcept { return hasFlag(RABlockFlags::kIsAllocated); }
+  inline bool isFuncExit() const noexcept { return hasFlag(RABlockFlags::kIsFuncExit); }
+  inline bool hasTerminator() const noexcept { return hasFlag(RABlockFlags::kHasTerminator); }
+  inline bool hasConsecutive() const noexcept { return hasFlag(RABlockFlags::kHasConsecutive); }
+  inline bool hasJumpTable() const noexcept { return hasFlag(RABlockFlags::kHasJumpTable); }
+
+  inline void makeConstructed(const RARegsStats& regStats) noexcept {
+    _flags |= RABlockFlags::kIsConstructed;
+    _regsStats.combineWith(regStats);
+  }
+
+  inline void makeReachable() noexcept { _flags |= RABlockFlags::kIsReachable; }
+  inline void makeTargetable() noexcept { _flags |= RABlockFlags::kIsTargetable; }
+  inline void makeAllocated() noexcept { _flags |= RABlockFlags::kIsAllocated; }
+
+  inline const RARegsStats& regsStats() const noexcept { return _regsStats; }
+
+  inline bool hasPredecessors() const noexcept { return !_predecessors.empty(); }
+  inline bool hasSuccessors() const noexcept { return !_successors.empty(); }
+
+  inline bool hasSuccessor(RABlock* block) noexcept {
+    if (block->_predecessors.size() < _successors.size())
+      return block->_predecessors.contains(this);
+    else
+      return _successors.contains(block);
+  }
+
+  inline const RABlocks& predecessors() const noexcept { return _predecessors; }
+  inline const RABlocks& successors() const noexcept { return _successors; }
+
+  inline BaseNode* first() const noexcept { return _first; }
+  inline BaseNode* last() const noexcept { return _last; }
+
+  inline void setFirst(BaseNode* node) noexcept { _first = node; }
+  inline void setLast(BaseNode* node) noexcept { _last = node; }
+
+  inline uint32_t firstPosition() const noexcept { return _firstPosition; }
+  inline void setFirstPosition(uint32_t position) noexcept { _firstPosition = position; }
+
+  inline uint32_t endPosition() const noexcept { return _endPosition; }
+  inline void setEndPosition(uint32_t position) noexcept { _endPosition = position; }
+
+  inline uint32_t povOrder() const noexcept { return _povOrder; }
+
+  inline RegMask entryScratchGpRegs() const noexcept;
+  inline RegMask exitScratchGpRegs() const noexcept { return _exitScratchGpRegs; }
+
+  inline void addEntryScratchGpRegs(RegMask regMask) noexcept { _entryScratchGpRegs |= regMask; }
+  inline void addExitScratchGpRegs(RegMask regMask) noexcept { _exitScratchGpRegs |= regMask; }
+
+  inline bool hasSharedAssignmentId() const noexcept { return _sharedAssignmentId != Globals::kInvalidId; }
+  inline uint32_t sharedAssignmentId() const noexcept { return _sharedAssignmentId; }
+  inline void setSharedAssignmentId(uint32_t id) noexcept { _sharedAssignmentId = id; }
+
+  inline uint64_t timestamp() const noexcept { return _timestamp; }
+  inline bool hasTimestamp(uint64_t ts) const noexcept { return _timestamp == ts; }
+  inline void setTimestamp(uint64_t ts) const noexcept { _timestamp = ts; }
+  inline void resetTimestamp() const noexcept { _timestamp = 0; }
+
+  inline RABlock* consecutive() const noexcept { return hasConsecutive() ? _successors[0] : nullptr; }
+
+  inline RABlock* iDom() noexcept { return _idom; }
+  inline const RABlock* iDom() const noexcept { return _idom; }
+  inline void setIDom(RABlock* block) noexcept { _idom = block; }
+
+  inline ZoneBitVector& liveIn() noexcept { return _liveBits[kLiveIn]; }
+  inline const ZoneBitVector& liveIn() const noexcept { return _liveBits[kLiveIn]; }
+
+  inline ZoneBitVector& liveOut() noexcept { return _liveBits[kLiveOut]; }
+  inline const ZoneBitVector& liveOut() const noexcept { return _liveBits[kLiveOut]; }
+
+  inline ZoneBitVector& gen() noexcept { return _liveBits[kLiveGen]; }
+  inline const ZoneBitVector& gen() const noexcept { return _liveBits[kLiveGen]; }
+
+  inline ZoneBitVector& kill() noexcept { return _liveBits[kLiveKill]; }
+  inline const ZoneBitVector& kill() const noexcept { return _liveBits[kLiveKill]; }
+
+  inline Error resizeLiveBits(uint32_t size) noexcept {
+    ASMJIT_PROPAGATE(_liveBits[kLiveIn  ].resize(allocator(), size));
+    ASMJIT_PROPAGATE(_liveBits[kLiveOut ].resize(allocator(), size));
+    ASMJIT_PROPAGATE(_liveBits[kLiveGen ].resize(allocator(), size));
+    ASMJIT_PROPAGATE(_liveBits[kLiveKill].resize(allocator(), size));
+    return kErrorOk;
+  }
+
+  inline bool hasEntryAssignment() const noexcept { return _entryPhysToWorkMap != nullptr; }
+  inline PhysToWorkMap* entryPhysToWorkMap() const noexcept { return _entryPhysToWorkMap; }
+  inline void setEntryAssignment(PhysToWorkMap* physToWorkMap) noexcept { _entryPhysToWorkMap = physToWorkMap; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Adds a successor to this block, and predecessor to `successor`, making connection on both sides.
+  //!
+  //! This API must be used to manage successors and predecessors, never manage it manually.
+  Error appendSuccessor(RABlock* successor) noexcept;
+
+  //! Similar to `appendSuccessor()`, but does prepend instead append.
+  //!
+  //! This function is used to add a natural flow (always first) to the block.
+  Error prependSuccessor(RABlock* successor) noexcept;
+
+  //! \}
+};
+
+//! Register allocator's data associated with each `InstNode`.
+class RAInst {
+public:
+  ASMJIT_NONCOPYABLE(RAInst)
+
+  //! \name Members
+  //! \{
+
+  //! Parent block.
+  RABlock* _block;
+  //! Instruction RW flags.
+  InstRWFlags _instRWFlags;
+  //! Aggregated RATiedFlags from all operands & instruction specific flags.
+  RATiedFlags _flags;
+  //! Total count of RATiedReg's.
+  uint32_t _tiedTotal;
+  //! Index of RATiedReg's per register group.
+  RARegIndex _tiedIndex;
+  //! Count of RATiedReg's per register group.
+  RARegCount _tiedCount;
+  //! Number of live, and thus interfering VirtReg's at this point.
+  RALiveCount _liveCount;
+  //! Fixed physical registers used.
+  RARegMask _usedRegs;
+  //! Clobbered registers (by a function call).
+  RARegMask _clobberedRegs;
+  //! Tied registers.
+  RATiedReg _tiedRegs[1];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RAInst(RABlock* block, InstRWFlags instRWFlags, RATiedFlags tiedFlags, uint32_t tiedTotal, const RARegMask& clobberedRegs) noexcept {
+    _block = block;
+    _instRWFlags = instRWFlags;
+    _flags = tiedFlags;
+    _tiedTotal = tiedTotal;
+    _tiedIndex.reset();
+    _tiedCount.reset();
+    _liveCount.reset();
+    _usedRegs.reset();
+    _clobberedRegs = clobberedRegs;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns instruction RW flags.
+  inline InstRWFlags instRWFlags() const noexcept { return _instRWFlags; };
+  //! Tests whether the given `flag` is present in instruction RW flags.
+  inline bool hasInstRWFlag(InstRWFlags flag) const noexcept { return Support::test(_instRWFlags, flag); }
+  //! Adds `flags` to instruction RW flags.
+  inline void addInstRWFlags(InstRWFlags flags) noexcept { _instRWFlags |= flags; }
+
+  //! Returns the instruction flags.
+  inline RATiedFlags flags() const noexcept { return _flags; }
+  //! Tests whether the instruction has flag `flag`.
+  inline bool hasFlag(RATiedFlags flag) const noexcept { return Support::test(_flags, flag); }
+  //! Replaces the existing instruction flags with `flags`.
+  inline void setFlags(RATiedFlags flags) noexcept { _flags = flags; }
+  //! Adds instruction `flags` to this RAInst.
+  inline void addFlags(RATiedFlags flags) noexcept { _flags |= flags; }
+  //! Clears instruction `flags` from  this RAInst.
+  inline void clearFlags(RATiedFlags flags) noexcept { _flags &= ~flags; }
+
+  //! Tests whether this instruction can be transformed to another instruction if necessary.
+  inline bool isTransformable() const noexcept { return hasFlag(RATiedFlags::kInst_IsTransformable); }
+
+  //! Returns the associated block with this RAInst.
+  inline RABlock* block() const noexcept { return _block; }
+
+  //! Returns tied registers (all).
+  inline RATiedReg* tiedRegs() const noexcept { return const_cast<RATiedReg*>(_tiedRegs); }
+  //! Returns tied registers for a given `group`.
+  inline RATiedReg* tiedRegs(RegGroup group) const noexcept { return const_cast<RATiedReg*>(_tiedRegs) + _tiedIndex.get(group); }
+
+  //! Returns count of all tied registers.
+  inline uint32_t tiedCount() const noexcept { return _tiedTotal; }
+  //! Returns count of tied registers of a given `group`.
+  inline uint32_t tiedCount(RegGroup group) const noexcept { return _tiedCount[group]; }
+
+  //! Returns `RATiedReg` at the given `index`.
+  inline RATiedReg* tiedAt(uint32_t index) const noexcept {
+    ASMJIT_ASSERT(index < _tiedTotal);
+    return tiedRegs() + index;
+  }
+
+  //! Returns `RATiedReg` at the given `index` of the given register `group`.
+  inline RATiedReg* tiedOf(RegGroup group, uint32_t index) const noexcept {
+    ASMJIT_ASSERT(index < _tiedCount.get(group));
+    return tiedRegs(group) + index;
+  }
+
+  inline void setTiedAt(uint32_t index, RATiedReg& tied) noexcept {
+    ASMJIT_ASSERT(index < _tiedTotal);
+    _tiedRegs[index] = tied;
+  }
+
+  //! \name Static Functions
+  //! \{
+
+  static inline size_t sizeOf(uint32_t tiedRegCount) noexcept {
+    return sizeof(RAInst) - sizeof(RATiedReg) + tiedRegCount * sizeof(RATiedReg);
+  }
+
+  //! \}
+};
+
+//! A helper class that is used to build an array of RATiedReg items that are then copied to `RAInst`.
+class RAInstBuilder {
+public:
+  ASMJIT_NONCOPYABLE(RAInstBuilder)
+
+  //! \name Members
+  //! \{
+
+  //! Instruction RW flags.
+  InstRWFlags _instRWFlags;
+
+  //! Flags combined from all RATiedReg's.
+  RATiedFlags _aggregatedFlags;
+  //! Flags that will be cleared before storing the aggregated flags to `RAInst`.
+  RATiedFlags _forbiddenFlags;
+  RARegCount _count;
+  RARegsStats _stats;
+
+  RARegMask _used;
+  RARegMask _clobbered;
+
+  //! Current tied register in `_tiedRegs`.
+  RATiedReg* _cur;
+  //! Array of temporary tied registers.
+  RATiedReg _tiedRegs[128];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RAInstBuilder() noexcept { reset(); }
+
+  inline void init() noexcept { reset(); }
+  inline void reset() noexcept {
+    _instRWFlags = InstRWFlags::kNone;
+    _aggregatedFlags = RATiedFlags::kNone;
+    _forbiddenFlags = RATiedFlags::kNone;
+    _count.reset();
+    _stats.reset();
+    _used.reset();
+    _clobbered.reset();
+    _cur = _tiedRegs;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline InstRWFlags instRWFlags() const noexcept { return _instRWFlags; }
+  inline bool hasInstRWFlag(InstRWFlags flag) const noexcept { return Support::test(_instRWFlags, flag); }
+  inline void addInstRWFlags(InstRWFlags flags) noexcept { _instRWFlags |= flags; }
+  inline void clearInstRWFlags(InstRWFlags flags) noexcept { _instRWFlags &= ~flags; }
+
+  inline RATiedFlags aggregatedFlags() const noexcept { return _aggregatedFlags; }
+  inline void addAggregatedFlags(RATiedFlags flags) noexcept { _aggregatedFlags |= flags; }
+
+  inline RATiedFlags forbiddenFlags() const noexcept { return _forbiddenFlags; }
+  inline void addForbiddenFlags(RATiedFlags flags) noexcept { _forbiddenFlags |= flags; }
+
+  //! Returns the number of tied registers added to the builder.
+  inline uint32_t tiedRegCount() const noexcept { return uint32_t((size_t)(_cur - _tiedRegs)); }
+
+  inline RATiedReg* begin() noexcept { return _tiedRegs; }
+  inline RATiedReg* end() noexcept { return _cur; }
+
+  inline const RATiedReg* begin() const noexcept { return _tiedRegs; }
+  inline const RATiedReg* end() const noexcept { return _cur; }
+
+  //! Returns `RATiedReg` at the given `index`.
+  inline RATiedReg* operator[](uint32_t index) noexcept {
+    ASMJIT_ASSERT(index < tiedRegCount());
+    return &_tiedRegs[index];
+  }
+
+  //! Returns `RATiedReg` at the given `index`. (const).
+  inline const RATiedReg* operator[](uint32_t index) const noexcept {
+    ASMJIT_ASSERT(index < tiedRegCount());
+    return &_tiedRegs[index];
+  }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  Error add(
+    RAWorkReg* workReg,
+    RATiedFlags flags,
+    RegMask useRegMask, uint32_t useId, uint32_t useRewriteMask,
+    RegMask outRegMask, uint32_t outId, uint32_t outRewriteMask,
+    uint32_t rmSize = 0,
+    uint32_t consecutiveParent = Globals::kInvalidId) noexcept {
+
+    RegGroup group = workReg->group();
+    RATiedReg* tiedReg = workReg->tiedReg();
+
+    if (useId != BaseReg::kIdBad) {
+      _stats.makeFixed(group);
+      _used[group] |= Support::bitMask(useId);
+      flags |= RATiedFlags::kUseFixed;
+    }
+
+    if (outId != BaseReg::kIdBad) {
+      _clobbered[group] |= Support::bitMask(outId);
+      flags |= RATiedFlags::kOutFixed;
+    }
+
+    _aggregatedFlags |= flags;
+    _stats.makeUsed(group);
+
+    if (!tiedReg) {
+      // Could happen when the builder is not reset properly after each instruction.
+      ASMJIT_ASSERT(tiedRegCount() < ASMJIT_ARRAY_SIZE(_tiedRegs));
+
+      tiedReg = _cur++;
+      tiedReg->init(workReg->workId(), flags, useRegMask, useId, useRewriteMask, outRegMask, outId, outRewriteMask, rmSize, consecutiveParent);
+      workReg->setTiedReg(tiedReg);
+
+      _count.add(group);
+      return kErrorOk;
+    }
+    else {
+      if (consecutiveParent != tiedReg->consecutiveParent()) {
+        if (tiedReg->consecutiveParent() != Globals::kInvalidId)
+          return DebugUtils::errored(kErrorInvalidState);
+        tiedReg->_consecutiveParent = consecutiveParent;
+      }
+
+      if (useId != BaseReg::kIdBad) {
+        if (ASMJIT_UNLIKELY(tiedReg->hasUseId()))
+          return DebugUtils::errored(kErrorOverlappedRegs);
+        tiedReg->setUseId(useId);
+      }
+
+      if (outId != BaseReg::kIdBad) {
+        if (ASMJIT_UNLIKELY(tiedReg->hasOutId()))
+          return DebugUtils::errored(kErrorOverlappedRegs);
+        tiedReg->setOutId(outId);
+      }
+
+      tiedReg->addRefCount();
+      tiedReg->addFlags(flags);
+      tiedReg->_useRegMask &= useRegMask;
+      tiedReg->_useRewriteMask |= useRewriteMask;
+      tiedReg->_outRegMask &= outRegMask;
+      tiedReg->_outRewriteMask |= outRewriteMask;
+      tiedReg->_rmSize = uint8_t(Support::max<uint32_t>(tiedReg->rmSize(), rmSize));
+      return kErrorOk;
+    }
+  }
+
+  Error addCallArg(RAWorkReg* workReg, uint32_t useId) noexcept {
+    ASMJIT_ASSERT(useId != BaseReg::kIdBad);
+
+    RATiedFlags flags = RATiedFlags::kUse | RATiedFlags::kRead | RATiedFlags::kUseFixed;
+    RegGroup group = workReg->group();
+    RegMask allocable = Support::bitMask(useId);
+
+    _aggregatedFlags |= flags;
+    _used[group] |= allocable;
+    _stats.makeFixed(group);
+    _stats.makeUsed(group);
+
+    RATiedReg* tiedReg = workReg->tiedReg();
+    if (!tiedReg) {
+      // Could happen when the builder is not reset properly after each instruction.
+      ASMJIT_ASSERT(tiedRegCount() < ASMJIT_ARRAY_SIZE(_tiedRegs));
+
+      tiedReg = _cur++;
+      tiedReg->init(workReg->workId(), flags, allocable, useId, 0, allocable, BaseReg::kIdBad, 0);
+      workReg->setTiedReg(tiedReg);
+
+      _count.add(group);
+      return kErrorOk;
+    }
+    else {
+      if (tiedReg->hasUseId()) {
+        flags |= RATiedFlags::kDuplicate;
+        tiedReg->_useRegMask |= allocable;
+      }
+      else {
+        tiedReg->setUseId(useId);
+        tiedReg->_useRegMask &= allocable;
+      }
+
+      tiedReg->addRefCount();
+      tiedReg->addFlags(flags);
+      return kErrorOk;
+    }
+  }
+
+  Error addCallRet(RAWorkReg* workReg, uint32_t outId) noexcept {
+    ASMJIT_ASSERT(outId != BaseReg::kIdBad);
+
+    RATiedFlags flags = RATiedFlags::kOut | RATiedFlags::kWrite | RATiedFlags::kOutFixed;
+    RegGroup group = workReg->group();
+    RegMask outRegs = Support::bitMask(outId);
+
+    _aggregatedFlags |= flags;
+    _used[group] |= outRegs;
+    _stats.makeFixed(group);
+    _stats.makeUsed(group);
+
+    RATiedReg* tiedReg = workReg->tiedReg();
+    if (!tiedReg) {
+      // Could happen when the builder is not reset properly after each instruction.
+      ASMJIT_ASSERT(tiedRegCount() < ASMJIT_ARRAY_SIZE(_tiedRegs));
+
+      tiedReg = _cur++;
+      tiedReg->init(workReg->workId(), flags, Support::allOnes<RegMask>(), BaseReg::kIdBad, 0, outRegs, outId, 0);
+      workReg->setTiedReg(tiedReg);
+
+      _count.add(group);
+      return kErrorOk;
+    }
+    else {
+      if (tiedReg->hasOutId())
+        return DebugUtils::errored(kErrorOverlappedRegs);
+
+      tiedReg->addRefCount();
+      tiedReg->addFlags(flags);
+      tiedReg->setOutId(outId);
+      return kErrorOk;
+    }
+  }
+
+  //! \}
+};
+
+//! Intersection of multiple register assignments.
+//!
+//! See \ref RAAssignment for more information about register assignments.
+class RASharedAssignment {
+public:
+  typedef RAAssignment::PhysToWorkMap PhysToWorkMap;
+  typedef RAAssignment::WorkToPhysMap WorkToPhysMap;
+
+  //! \name Members
+  //! \{
+
+  //! Bit-mask of registers that cannot be used upon a block entry, for each block that has this shared assignment.
+  //! Scratch registers can come from ISA limits (like jecx/loop instructions on x86) or because the registers are
+  //! used by jump/branch instruction that uses registers to perform an indirect jump.
+  RegMask _entryScratchGpRegs = 0;
+  //! Union of all live-in registers.
+  ZoneBitVector _liveIn {};
+  //! Register assignment (PhysToWork).
+  PhysToWorkMap* _physToWorkMap = nullptr;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool empty() const noexcept { return _physToWorkMap == nullptr; }
+
+  inline RegMask entryScratchGpRegs() const noexcept { return _entryScratchGpRegs; }
+  inline void addEntryScratchGpRegs(RegMask mask) noexcept { _entryScratchGpRegs |= mask; }
+
+  inline const ZoneBitVector& liveIn() const noexcept { return _liveIn; }
+
+  inline PhysToWorkMap* physToWorkMap() const noexcept { return _physToWorkMap; }
+  inline void assignPhysToWorkMap(PhysToWorkMap* physToWorkMap) noexcept { _physToWorkMap = physToWorkMap; }
+
+  //! \}
+};
+
+//! Register allocation pass used by `BaseCompiler`.
+class BaseRAPass : public FuncPass {
+public:
+  ASMJIT_NONCOPYABLE(BaseRAPass)
+  typedef FuncPass Base;
+
+  enum : uint32_t {
+    kCallArgWeight = 80
+  };
+
+  typedef RAAssignment::PhysToWorkMap PhysToWorkMap;
+  typedef RAAssignment::WorkToPhysMap WorkToPhysMap;
+
+  //! \name Members
+  //! \{
+
+  //! Allocator that uses zone passed to `runOnFunction()`.
+  ZoneAllocator _allocator {};
+  //! Emit helper.
+  BaseEmitHelper* _iEmitHelper = nullptr;
+
+  //! Logger, disabled if null.
+  Logger* _logger = nullptr;
+  //! Format options, copied from Logger, or zeroed if there is no logger.
+  FormatOptions _formatOptions {};
+  //! Diagnostic options, copied from Emitter, or zeroed if there is no logger.
+  DiagnosticOptions _diagnosticOptions {};
+
+  //! Function being processed.
+  FuncNode* _func = nullptr;
+  //! Stop node.
+  BaseNode* _stop = nullptr;
+  //! Node that is used to insert extra code after the function body.
+  BaseNode* _extraBlock = nullptr;
+
+  //! Blocks (first block is the entry, always exists).
+  RABlocks _blocks {};
+  //! Function exit blocks (usually one, but can contain more).
+  RABlocks _exits {};
+  //! Post order view (POV).
+  RABlocks _pov {};
+
+  //! Number of instruction nodes.
+  uint32_t _instructionCount = 0;
+  //! Number of created blocks (internal).
+  uint32_t _createdBlockCount = 0;
+
+  //! Shared assignment blocks.
+  ZoneVector<RASharedAssignment> _sharedAssignments {};
+
+  //! Timestamp generator (incremental).
+  mutable uint64_t _lastTimestamp = 0;
+
+  //! Architecture traits.
+  const ArchTraits* _archTraits = nullptr;
+  //! Index to physical registers in `RAAssignment::PhysToWorkMap`.
+  RARegIndex _physRegIndex = RARegIndex();
+  //! Count of physical registers in `RAAssignment::PhysToWorkMap`.
+  RARegCount _physRegCount = RARegCount();
+  //! Total number of physical registers.
+  uint32_t _physRegTotal = 0;
+  //! Indexes of a possible scratch registers that can be selected if necessary.
+  Support::Array<uint8_t, 2> _scratchRegIndexes {};
+
+  //! Registers available for allocation.
+  RARegMask _availableRegs = RARegMask();
+  //! Count of physical registers per group.
+  RARegCount _availableRegCount = RARegCount();
+  //! Registers clobbered by the function.
+  RARegMask _clobberedRegs = RARegMask();
+
+  //! Work registers (registers used by the function).
+  RAWorkRegs _workRegs;
+  //! Work registers per register group.
+  Support::Array<RAWorkRegs, Globals::kNumVirtGroups> _workRegsOfGroup;
+
+  //! Register allocation strategy per register group.
+  Support::Array<RAStrategy, Globals::kNumVirtGroups> _strategy;
+  //! Global max live-count (from all blocks) per register group.
+  RALiveCount _globalMaxLiveCount = RALiveCount();
+  //! Global live spans per register group.
+  Support::Array<LiveRegSpans*, Globals::kNumVirtGroups> _globalLiveSpans {};
+  //! Temporary stack slot.
+  Operand _temporaryMem = Operand();
+
+  //! Stack pointer.
+  BaseReg _sp = BaseReg();
+  //! Frame pointer.
+  BaseReg _fp = BaseReg();
+  //! Stack manager.
+  RAStackAllocator _stackAllocator {};
+  //! Function arguments assignment.
+  FuncArgsAssignment _argsAssignment {};
+  //! Some StackArgs have to be assigned to StackSlots.
+  uint32_t _numStackArgsToStackSlots = 0;
+
+  //! Maximum name-size computed from all WorkRegs.
+  uint32_t _maxWorkRegNameSize = 0;
+  //! Temporary string builder used to format comments.
+  StringTmp<80> _tmpString;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  BaseRAPass() noexcept;
+  virtual ~BaseRAPass() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns \ref Logger passed to \ref runOnFunction().
+  inline Logger* logger() const noexcept { return _logger; }
+
+  //! Returns either a valid logger if the given `option` is set and logging is enabled, or nullptr.
+  inline Logger* getLoggerIf(DiagnosticOptions option) const noexcept { return Support::test(_diagnosticOptions, option) ? _logger : nullptr; }
+
+  //! Returns whether the diagnostic `option` is enabled.
+  //!
+  //! \note Returns false if there is no logger (as diagnostics without logging make no sense).
+  inline bool hasDiagnosticOption(DiagnosticOptions option) const noexcept { return Support::test(_diagnosticOptions, option); }
+
+  //! Returns \ref Zone passed to \ref runOnFunction().
+  inline Zone* zone() const noexcept { return _allocator.zone(); }
+  //! Returns \ref ZoneAllocator used by the register allocator.
+  inline ZoneAllocator* allocator() const noexcept { return const_cast<ZoneAllocator*>(&_allocator); }
+
+  inline const ZoneVector<RASharedAssignment>& sharedAssignments() const { return _sharedAssignments; }
+  inline uint32_t sharedAssignmentCount() const noexcept { return _sharedAssignments.size(); }
+
+  //! Returns the current function node.
+  inline FuncNode* func() const noexcept { return _func; }
+  //! Returns the stop of the current function.
+  inline BaseNode* stop() const noexcept { return _stop; }
+
+  //! Returns an extra block used by the current function being processed.
+  inline BaseNode* extraBlock() const noexcept { return _extraBlock; }
+  //! Sets an extra block, see `extraBlock()`.
+  inline void setExtraBlock(BaseNode* node) noexcept { _extraBlock = node; }
+
+  inline uint32_t endPosition() const noexcept { return _instructionCount * 2; }
+
+  inline const RARegMask& availableRegs() const noexcept { return _availableRegs; }
+  inline const RARegMask& cloberredRegs() const noexcept { return _clobberedRegs; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void makeUnavailable(RegGroup group, uint32_t regId) noexcept {
+    _availableRegs[group] &= ~Support::bitMask(regId);
+    _availableRegCount[group]--;
+  }
+
+  //! Runs the register allocator for the given `func`.
+  Error runOnFunction(Zone* zone, Logger* logger, FuncNode* func) override;
+
+  //! Performs all allocation steps sequentially, called by `runOnFunction()`.
+  Error onPerformAllSteps() noexcept;
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  //! Called by \ref runOnFunction() before the register allocation to initialize
+  //! architecture-specific data and constraints.
+  virtual void onInit() noexcept = 0;
+
+  //! Called by \ref runOnFunction(` after register allocation to clean everything
+  //! up. Called even if the register allocation failed.
+  virtual void onDone() noexcept = 0;
+
+  //! \}
+
+  //! \name CFG - Basic-Block Management
+  //! \{
+
+  //! Returns the function's entry block.
+  inline RABlock* entryBlock() noexcept {
+    ASMJIT_ASSERT(!_blocks.empty());
+    return _blocks[0];
+  }
+
+  //! \overload
+  inline const RABlock* entryBlock() const noexcept {
+    ASMJIT_ASSERT(!_blocks.empty());
+    return _blocks[0];
+  }
+
+  //! Returns all basic blocks of this function.
+  inline RABlocks& blocks() noexcept { return _blocks; }
+  //! \overload
+  inline const RABlocks& blocks() const noexcept { return _blocks; }
+
+  //! Returns the count of basic blocks (returns size of `_blocks` array).
+  inline uint32_t blockCount() const noexcept { return _blocks.size(); }
+  //! Returns the count of reachable basic blocks (returns size of `_pov` array).
+  inline uint32_t reachableBlockCount() const noexcept { return _pov.size(); }
+
+  //! Tests whether the CFG has dangling blocks - these were created by `newBlock()`, but not added to CFG through
+  //! `addBlocks()`. If `true` is returned and the  CFG is constructed it means that something is missing and it's
+  //! incomplete.
+  //!
+  //! \note This is only used to check if the number of created blocks matches the number of added blocks.
+  inline bool hasDanglingBlocks() const noexcept { return _createdBlockCount != blockCount(); }
+
+  //! Gest a next timestamp to be used to mark CFG blocks.
+  inline uint64_t nextTimestamp() const noexcept { return ++_lastTimestamp; }
+
+  //! Createss a new `RABlock` instance.
+  //!
+  //! \note New blocks don't have ID assigned until they are added to the block array by calling `addBlock()`.
+  RABlock* newBlock(BaseNode* initialNode = nullptr) noexcept;
+
+  //! Tries to find a neighboring LabelNode (without going through code) that is already connected with `RABlock`.
+  //! If no label is found then a new RABlock is created and assigned to all possible labels in a backward direction.
+  RABlock* newBlockOrExistingAt(LabelNode* cbLabel, BaseNode** stoppedAt = nullptr) noexcept;
+
+  //! Adds the given `block` to the block list and assign it a unique block id.
+  Error addBlock(RABlock* block) noexcept;
+
+  inline Error addExitBlock(RABlock* block) noexcept {
+    block->addFlags(RABlockFlags::kIsFuncExit);
+    return _exits.append(allocator(), block);
+  }
+
+  ASMJIT_FORCE_INLINE RAInst* newRAInst(RABlock* block, InstRWFlags instRWFlags, RATiedFlags flags, uint32_t tiedRegCount, const RARegMask& clobberedRegs) noexcept {
+    void* p = zone()->alloc(RAInst::sizeOf(tiedRegCount));
+    if (ASMJIT_UNLIKELY(!p))
+      return nullptr;
+    return new(p) RAInst(block, instRWFlags, flags, tiedRegCount, clobberedRegs);
+  }
+
+  ASMJIT_FORCE_INLINE Error assignRAInst(BaseNode* node, RABlock* block, RAInstBuilder& ib) noexcept {
+    uint32_t tiedRegCount = ib.tiedRegCount();
+    RAInst* raInst = newRAInst(block, ib.instRWFlags(), ib.aggregatedFlags(), tiedRegCount, ib._clobbered);
+
+    if (ASMJIT_UNLIKELY(!raInst))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    RARegIndex index;
+    RATiedFlags flagsFilter = ~ib.forbiddenFlags();
+
+    index.buildIndexes(ib._count);
+    raInst->_tiedIndex = index;
+    raInst->_tiedCount = ib._count;
+
+    for (uint32_t i = 0; i < tiedRegCount; i++) {
+      RATiedReg* tiedReg = ib[i];
+      RAWorkReg* workReg = workRegById(tiedReg->workId());
+
+      workReg->resetTiedReg();
+      RegGroup group = workReg->group();
+
+      if (tiedReg->hasUseId()) {
+        block->addFlags(RABlockFlags::kHasFixedRegs);
+        raInst->_usedRegs[group] |= Support::bitMask(tiedReg->useId());
+      }
+
+      if (tiedReg->hasOutId()) {
+        block->addFlags(RABlockFlags::kHasFixedRegs);
+      }
+
+      RATiedReg& dst = raInst->_tiedRegs[index[group]++];
+      dst = *tiedReg;
+      dst._flags &= flagsFilter;
+
+      if (!tiedReg->isDuplicate())
+        dst._useRegMask &= ~ib._used[group];
+    }
+
+    node->setPassData<RAInst>(raInst);
+    return kErrorOk;
+  }
+
+  //! \}
+
+  //! \name CFG - Build CFG
+  //! \{
+
+  //! Traverse the whole function and do the following:
+  //!
+  //!   1. Construct CFG (represented by `RABlock`) by populating `_blocks` and `_exits`. Blocks describe the control
+  //!      flow of the function and contain some additional information that is used by the register allocator.
+  //!
+  //!   2. Remove unreachable code immediately. This is not strictly necessary for BaseCompiler itself as the register
+  //!      allocator cannot reach such nodes, but keeping instructions that use virtual registers would fail during
+  //!      instruction encoding phase (Assembler).
+  //!
+  //!   3. `RAInst` is created for each `InstNode` or compatible. It contains information that is essential for further
+  //!      analysis and register allocation.
+  //!
+  //! Use `RACFGBuilderT` template that provides the necessary boilerplate.
+  virtual Error buildCFG() noexcept = 0;
+
+  //! Called after the CFG is built.
+  Error initSharedAssignments(const ZoneVector<uint32_t>& sharedAssignmentsMap) noexcept;
+
+  //! \}
+
+  //! \name CFG - Views Order
+  //! \{
+
+  //! Constructs CFG views (only POV at the moment).
+  Error buildCFGViews() noexcept;
+
+  //! \}
+
+  //! \name CFG - Dominators
+  //! \{
+
+  // Terminology:
+  //   - A node `X` dominates a node `Z` if any path from the entry point to `Z` has to go through `X`.
+  //   - A node `Z` post-dominates a node `X` if any path from `X` to the end of the graph has to go through `Z`.
+
+  //! Constructs a dominator-tree from CFG.
+  Error buildCFGDominators() noexcept;
+
+  bool _strictlyDominates(const RABlock* a, const RABlock* b) const noexcept;
+  const RABlock* _nearestCommonDominator(const RABlock* a, const RABlock* b) const noexcept;
+
+  //! Tests whether the basic block `a` dominates `b` - non-strict, returns true when `a == b`.
+  inline bool dominates(const RABlock* a, const RABlock* b) const noexcept { return a == b ? true : _strictlyDominates(a, b); }
+  //! Tests whether the basic block `a` dominates `b` - strict dominance check, returns false when `a == b`.
+  inline bool strictlyDominates(const RABlock* a, const RABlock* b) const noexcept { return a == b ? false : _strictlyDominates(a, b); }
+
+  //! Returns a nearest common dominator of `a` and `b`.
+  inline RABlock* nearestCommonDominator(RABlock* a, RABlock* b) const noexcept { return const_cast<RABlock*>(_nearestCommonDominator(a, b)); }
+  //! Returns a nearest common dominator of `a` and `b` (const).
+  inline const RABlock* nearestCommonDominator(const RABlock* a, const RABlock* b) const noexcept { return _nearestCommonDominator(a, b); }
+
+  //! \}
+
+  //! \name CFG - Utilities
+  //! \{
+
+  Error removeUnreachableCode() noexcept;
+
+  //! Returns `node` or some node after that is ideal for beginning a new block. This function is mostly used after
+  //! a conditional or unconditional jump to select the successor node. In some cases the next node could be a label,
+  //! which means it could have assigned some block already.
+  BaseNode* findSuccessorStartingAt(BaseNode* node) noexcept;
+
+  //! Returns `true` of the `node` can flow to `target` without reaching code nor data. It's used to eliminate jumps
+  //! to labels that are next right to them.
+  bool isNextTo(BaseNode* node, BaseNode* target) noexcept;
+
+  //! \}
+
+  //! \name Virtual Register Management
+  //! \{
+
+  //! Returns a native size of the general-purpose register of the target architecture.
+  inline uint32_t registerSize() const noexcept { return _sp.size(); }
+  inline uint32_t availableRegCount(RegGroup group) const noexcept { return _availableRegCount[group]; }
+
+  inline RAWorkReg* workRegById(uint32_t workId) const noexcept { return _workRegs[workId]; }
+
+  inline RAWorkRegs& workRegs() noexcept { return _workRegs; }
+  inline RAWorkRegs& workRegs(RegGroup group) noexcept { return _workRegsOfGroup[group]; }
+
+  inline const RAWorkRegs& workRegs() const noexcept { return _workRegs; }
+  inline const RAWorkRegs& workRegs(RegGroup group) const noexcept { return _workRegsOfGroup[group]; }
+
+  inline uint32_t workRegCount() const noexcept { return _workRegs.size(); }
+  inline uint32_t workRegCount(RegGroup group) const noexcept { return _workRegsOfGroup[group].size(); }
+
+  inline void _buildPhysIndex() noexcept {
+    _physRegIndex.buildIndexes(_physRegCount);
+    _physRegTotal = uint32_t(_physRegIndex[RegGroup::kMaxVirt]) +
+                    uint32_t(_physRegCount[RegGroup::kMaxVirt]) ;
+  }
+  inline uint32_t physRegIndex(RegGroup group) const noexcept { return _physRegIndex[group]; }
+  inline uint32_t physRegTotal() const noexcept { return _physRegTotal; }
+
+  Error _asWorkReg(VirtReg* vReg, RAWorkReg** out) noexcept;
+
+  //! Creates `RAWorkReg` data for the given `vReg`. The function does nothing
+  //! if `vReg` already contains link to `RAWorkReg`. Called by `constructBlocks()`.
+  inline Error asWorkReg(VirtReg* vReg, RAWorkReg** out) noexcept {
+    *out = vReg->workReg();
+    return *out ? kErrorOk : _asWorkReg(vReg, out);
+  }
+
+  ASMJIT_FORCE_INLINE Error virtIndexAsWorkReg(uint32_t vIndex, RAWorkReg** out) noexcept {
+    const ZoneVector<VirtReg*>& virtRegs = cc()->virtRegs();
+    if (ASMJIT_UNLIKELY(vIndex >= virtRegs.size()))
+      return DebugUtils::errored(kErrorInvalidVirtId);
+    return asWorkReg(virtRegs[vIndex], out);
+  }
+
+  inline RAStackSlot* getOrCreateStackSlot(RAWorkReg* workReg) noexcept {
+    RAStackSlot* slot = workReg->stackSlot();
+
+    if (slot)
+      return slot;
+
+    slot = _stackAllocator.newSlot(_sp.id(), workReg->virtReg()->virtSize(), workReg->virtReg()->alignment(), RAStackSlot::kFlagRegHome);
+    workReg->_stackSlot = slot;
+    workReg->markStackUsed();
+    return slot;
+  }
+
+  inline BaseMem workRegAsMem(RAWorkReg* workReg) noexcept {
+    getOrCreateStackSlot(workReg);
+    return BaseMem(OperandSignature::fromOpType(OperandType::kMem) |
+                   OperandSignature::fromMemBaseType(_sp.type()) |
+                   OperandSignature::fromBits(OperandSignature::kMemRegHomeFlag),
+                   workReg->virtId(), 0, 0);
+  }
+
+  WorkToPhysMap* newWorkToPhysMap() noexcept;
+  PhysToWorkMap* newPhysToWorkMap() noexcept;
+
+  inline PhysToWorkMap* clonePhysToWorkMap(const PhysToWorkMap* map) noexcept {
+    size_t size = PhysToWorkMap::sizeOf(_physRegTotal);
+    return static_cast<PhysToWorkMap*>(zone()->dupAligned(map, size, sizeof(uint32_t)));
+  }
+
+  //! \name Liveness Analysis & Statistics
+  //! \{
+
+  //! 1. Calculates GEN/KILL/IN/OUT of each block.
+  //! 2. Calculates live spans and basic statistics of each work register.
+  Error buildLiveness() noexcept;
+
+  //! Assigns argIndex to WorkRegs. Must be called after the liveness analysis
+  //! finishes as it checks whether the argument is live upon entry.
+  Error assignArgIndexToWorkRegs() noexcept;
+
+  //! \}
+
+  //! \name Register Allocation - Global
+  //! \{
+
+  //! Runs a global register allocator.
+  Error runGlobalAllocator() noexcept;
+
+  //! Initializes data structures used for global live spans.
+  Error initGlobalLiveSpans() noexcept;
+
+  Error binPack(RegGroup group) noexcept;
+
+  //! \}
+
+  //! \name Register Allocation - Local
+  //! \{
+
+  //! Runs a local register allocator.
+  Error runLocalAllocator() noexcept;
+  Error setBlockEntryAssignment(RABlock* block, const RABlock* fromBlock, const RAAssignment& fromAssignment) noexcept;
+  Error setSharedAssignment(uint32_t sharedAssignmentId, const RAAssignment& fromAssignment) noexcept;
+
+  //! Called after the RA assignment has been assigned to a block.
+  //!
+  //! This cannot change the assignment, but can examine it.
+  Error blockEntryAssigned(const PhysToWorkMap* physToWorkMap) noexcept;
+
+  //! \}
+
+  //! \name Register Allocation Utilities
+  //! \{
+
+  Error useTemporaryMem(BaseMem& out, uint32_t size, uint32_t alignment) noexcept;
+
+  //! \}
+
+  //! \name Function Prolog & Epilog
+  //! \{
+
+  virtual Error updateStackFrame() noexcept;
+  Error _markStackArgsToKeep() noexcept;
+  Error _updateStackArgs() noexcept;
+  Error insertPrologEpilog() noexcept;
+
+  //! \}
+
+  //! \name Instruction Rewriter
+  //! \{
+
+  Error rewrite() noexcept;
+  virtual Error _rewrite(BaseNode* first, BaseNode* stop) noexcept = 0;
+
+  //! \}
+
+#ifndef ASMJIT_NO_LOGGING
+  //! \name Logging
+  //! \{
+
+  Error annotateCode() noexcept;
+
+  Error _dumpBlockIds(String& sb, const RABlocks& blocks) noexcept;
+  Error _dumpBlockLiveness(String& sb, const RABlock* block) noexcept;
+  Error _dumpLiveSpans(String& sb) noexcept;
+
+  //! \}
+#endif
+
+  //! \name Emit
+  //! \{
+
+  virtual Error emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept = 0;
+  virtual Error emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept = 0;
+
+  virtual Error emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept = 0;
+  virtual Error emitSave(uint32_t workId, uint32_t srcPhysId) noexcept = 0;
+
+  virtual Error emitJump(const Label& label) noexcept = 0;
+  virtual Error emitPreCall(InvokeNode* invokeNode) noexcept = 0;
+
+  //! \}
+};
+
+inline ZoneAllocator* RABlock::allocator() const noexcept { return _ra->allocator(); }
+
+inline RegMask RABlock::entryScratchGpRegs() const noexcept {
+  RegMask regs = _entryScratchGpRegs;
+  if (hasSharedAssignmentId())
+    regs = _ra->_sharedAssignments[_sharedAssignmentId].entryScratchGpRegs();
+  return regs;
+}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_CORE_RAPASS_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/rastack.cpp b/lib/lepton/asmjit/core/rastack.cpp
new file mode 100644
index 0000000000..2b7ed592df
--- /dev/null
+++ b/lib/lepton/asmjit/core/rastack.cpp
@@ -0,0 +1,184 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/rastack_p.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// RAStackAllocator - Slots
+// ========================
+
+RAStackSlot* RAStackAllocator::newSlot(uint32_t baseRegId, uint32_t size, uint32_t alignment, uint32_t flags) noexcept {
+  if (ASMJIT_UNLIKELY(_slots.willGrow(allocator(), 1) != kErrorOk))
+    return nullptr;
+
+  RAStackSlot* slot = allocator()->allocT<RAStackSlot>();
+  if (ASMJIT_UNLIKELY(!slot))
+    return nullptr;
+
+  slot->_baseRegId = uint8_t(baseRegId);
+  slot->_alignment = uint8_t(Support::max<uint32_t>(alignment, 1));
+  slot->_flags = uint16_t(flags);
+  slot->_useCount = 0;
+  slot->_size = size;
+
+  slot->_weight = 0;
+  slot->_offset = 0;
+
+  _alignment = Support::max<uint32_t>(_alignment, alignment);
+  _slots.appendUnsafe(slot);
+  return slot;
+}
+
+// RAStackAllocator - Utilities
+// ============================
+
+struct RAStackGap {
+  inline RAStackGap() noexcept
+    : offset(0),
+      size(0) {}
+
+  inline RAStackGap(uint32_t offset, uint32_t size) noexcept
+    : offset(offset),
+      size(size) {}
+
+  inline RAStackGap(const RAStackGap& other) noexcept
+    : offset(other.offset),
+      size(other.size) {}
+
+  uint32_t offset;
+  uint32_t size;
+};
+
+Error RAStackAllocator::calculateStackFrame() noexcept {
+  // Base weight added to all registers regardless of their size and alignment.
+  uint32_t kBaseRegWeight = 16;
+
+  // STEP 1:
+  //
+  // Update usage based on the size of the slot. We boost smaller slots in a way that 32-bit register has higher
+  // priority than a 128-bit register, however, if one 128-bit register is used 4 times more than some other 32-bit
+  // register it will overweight it.
+  for (RAStackSlot* slot : _slots) {
+    uint32_t alignment = slot->alignment();
+    ASMJIT_ASSERT(alignment > 0);
+
+    uint32_t power = Support::min<uint32_t>(Support::ctz(alignment), 6);
+    uint64_t weight;
+
+    if (slot->isRegHome())
+      weight = kBaseRegWeight + (uint64_t(slot->useCount()) * (7 - power));
+    else
+      weight = power;
+
+    // If overflown, which has less chance of winning a lottery, just use max possible weight. In such case it
+    // probably doesn't matter at all.
+    if (weight > 0xFFFFFFFFu)
+      weight = 0xFFFFFFFFu;
+
+    slot->setWeight(uint32_t(weight));
+  }
+
+  // STEP 2:
+  //
+  // Sort stack slots based on their newly calculated weight (in descending order).
+  _slots.sort([](const RAStackSlot* a, const RAStackSlot* b) noexcept {
+    return a->weight() >  b->weight() ? 1 :
+           a->weight() == b->weight() ? 0 : -1;
+  });
+
+  // STEP 3:
+  //
+  // Calculate offset of each slot. We start from the slot that has the highest weight and advance to slots with
+  // lower weight. It could look that offsets start from the first slot in our list and then simply increase, but
+  // it's not always the case as we also try to fill all gaps introduced by the fact that slots are sorted by
+  // weight and not by size & alignment, so when we need to align some slot we distribute the gap caused by the
+  // alignment to `gaps`.
+  uint32_t offset = 0;
+  ZoneVector<RAStackGap> gaps[kSizeCount - 1];
+
+  for (RAStackSlot* slot : _slots) {
+    if (slot->isStackArg())
+      continue;
+
+    uint32_t slotAlignment = slot->alignment();
+    uint32_t alignedOffset = Support::alignUp(offset, slotAlignment);
+
+    // Try to find a slot within gaps first, before advancing the `offset`.
+    bool foundGap = false;
+    uint32_t gapSize = 0;
+    uint32_t gapOffset = 0;
+
+    {
+      uint32_t slotSize = slot->size();
+      if (slotSize < (1u << uint32_t(ASMJIT_ARRAY_SIZE(gaps)))) {
+        // Iterate from the lowest to the highest possible.
+        uint32_t index = Support::ctz(slotSize);
+        do {
+          if (!gaps[index].empty()) {
+            RAStackGap gap = gaps[index].pop();
+
+            ASMJIT_ASSERT(Support::isAligned(gap.offset, slotAlignment));
+            slot->setOffset(int32_t(gap.offset));
+
+            gapSize = gap.size - slotSize;
+            gapOffset = gap.offset - slotSize;
+
+            foundGap = true;
+            break;
+          }
+        } while (++index < uint32_t(ASMJIT_ARRAY_SIZE(gaps)));
+      }
+    }
+
+    // No gap found, we may create a new one(s) if the current offset is not aligned.
+    if (!foundGap && offset != alignedOffset) {
+      gapSize = alignedOffset - offset;
+      gapOffset = alignedOffset;
+
+      offset = alignedOffset;
+    }
+
+    // True if we have found a gap and not filled all of it or we aligned the current offset.
+    if (gapSize) {
+      uint32_t gapEnd = gapSize + gapOffset;
+      while (gapOffset < gapEnd) {
+        uint32_t index = Support::ctz(gapOffset);
+        uint32_t slotSize = 1u << index;
+
+        // Weird case, better to bail...
+        if (gapEnd - gapOffset < slotSize)
+          break;
+
+        ASMJIT_PROPAGATE(gaps[index].append(allocator(), RAStackGap(gapOffset, slotSize)));
+        gapOffset += slotSize;
+      }
+    }
+
+    if (!foundGap) {
+      ASMJIT_ASSERT(Support::isAligned(offset, slotAlignment));
+      slot->setOffset(int32_t(offset));
+      offset += slot->size();
+    }
+  }
+
+  _stackSize = Support::alignUp(offset, _alignment);
+  return kErrorOk;
+}
+
+Error RAStackAllocator::adjustSlotOffsets(int32_t offset) noexcept {
+  for (RAStackSlot* slot : _slots)
+    if (!slot->isStackArg())
+      slot->_offset += offset;
+  return kErrorOk;
+}
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
diff --git a/lib/lepton/asmjit/core/rastack_p.h b/lib/lepton/asmjit/core/rastack_p.h
new file mode 100644
index 0000000000..90640b4dea
--- /dev/null
+++ b/lib/lepton/asmjit/core/rastack_p.h
@@ -0,0 +1,171 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_RASTACK_P_H_INCLUDED
+#define ASMJIT_CORE_RASTACK_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/radefs_p.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_ra
+//! \{
+
+//! Stack slot.
+struct RAStackSlot {
+  //! Stack slot flags.
+  //!
+  //! TODO: kFlagStackArg is not used by the current implementation, do we need to keep it?
+  enum Flags : uint16_t {
+    //! Stack slot is register home slot.
+    kFlagRegHome = 0x0001u,
+    //! Stack slot position matches argument passed via stack.
+    kFlagStackArg = 0x0002u
+  };
+
+  enum ArgIndex : uint32_t {
+    kNoArgIndex = 0xFF
+  };
+
+  //! \name Members
+  //! \{
+
+  //! Base register used to address the stack.
+  uint8_t _baseRegId;
+  //! Minimum alignment required by the slot.
+  uint8_t _alignment;
+  //! Reserved for future use.
+  uint16_t _flags;
+  //! Size of memory required by the slot.
+  uint32_t _size;
+
+  //! Usage counter (one unit equals one memory access).
+  uint32_t _useCount;
+  //! Weight of the slot, calculated by \ref RAStackAllocator::calculateStackFrame().
+  uint32_t _weight;
+  //! Stack offset, calculated by \ref RAStackAllocator::calculateStackFrame().
+  int32_t _offset;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline uint32_t baseRegId() const noexcept { return _baseRegId; }
+  inline void setBaseRegId(uint32_t id) noexcept { _baseRegId = uint8_t(id); }
+
+  inline uint32_t size() const noexcept { return _size; }
+  inline uint32_t alignment() const noexcept { return _alignment; }
+
+  inline uint32_t flags() const noexcept { return _flags; }
+  inline bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; }
+  inline void addFlags(uint32_t flags) noexcept { _flags = uint16_t(_flags | flags); }
+
+  inline bool isRegHome() const noexcept { return hasFlag(kFlagRegHome); }
+  inline bool isStackArg() const noexcept { return hasFlag(kFlagStackArg); }
+
+  inline uint32_t useCount() const noexcept { return _useCount; }
+  inline void addUseCount(uint32_t n = 1) noexcept { _useCount += n; }
+
+  inline uint32_t weight() const noexcept { return _weight; }
+  inline void setWeight(uint32_t weight) noexcept { _weight = weight; }
+
+  inline int32_t offset() const noexcept { return _offset; }
+  inline void setOffset(int32_t offset) noexcept { _offset = offset; }
+
+  //! \}
+};
+
+typedef ZoneVector<RAStackSlot*> RAStackSlots;
+
+//! Stack allocator.
+class RAStackAllocator {
+public:
+  ASMJIT_NONCOPYABLE(RAStackAllocator)
+
+  enum Size : uint32_t {
+    kSize1     = 0,
+    kSize2     = 1,
+    kSize4     = 2,
+    kSize8     = 3,
+    kSize16    = 4,
+    kSize32    = 5,
+    kSize64    = 6,
+    kSizeCount = 7
+  };
+
+  //! \name Members
+  //! \{
+
+  //! Allocator used to allocate internal data.
+  ZoneAllocator* _allocator;
+  //! Count of bytes used by all slots.
+  uint32_t _bytesUsed;
+  //! Calculated stack size (can be a bit greater than `_bytesUsed`).
+  uint32_t _stackSize;
+  //! Minimum stack alignment.
+  uint32_t _alignment;
+  //! Stack slots vector.
+  RAStackSlots _slots;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RAStackAllocator() noexcept
+    : _allocator(nullptr),
+      _bytesUsed(0),
+      _stackSize(0),
+      _alignment(1),
+      _slots() {}
+
+  inline void reset(ZoneAllocator* allocator) noexcept {
+    _allocator = allocator;
+    _bytesUsed = 0;
+    _stackSize = 0;
+    _alignment = 1;
+    _slots.reset();
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline ZoneAllocator* allocator() const noexcept { return _allocator; }
+
+  inline uint32_t bytesUsed() const noexcept { return _bytesUsed; }
+  inline uint32_t stackSize() const noexcept { return _stackSize; }
+  inline uint32_t alignment() const noexcept { return _alignment; }
+
+  inline RAStackSlots& slots() noexcept { return _slots; }
+  inline const RAStackSlots& slots() const noexcept { return _slots; }
+  inline uint32_t slotCount() const noexcept { return _slots.size(); }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  RAStackSlot* newSlot(uint32_t baseRegId, uint32_t size, uint32_t alignment, uint32_t flags = 0) noexcept;
+
+  Error calculateStackFrame() noexcept;
+  Error adjustSlotOffsets(int32_t offset) noexcept;
+
+  //! \}
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_CORE_RASTACK_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/string.cpp b/lib/lepton/asmjit/core/string.cpp
new file mode 100644
index 0000000000..83dc6efdaa
--- /dev/null
+++ b/lib/lepton/asmjit/core/string.cpp
@@ -0,0 +1,559 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/string.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// String - Globals
+// ================
+
+static const char String_baseN[] = "0123456789ABCDEF";
+
+constexpr size_t kMinAllocSize = 64;
+constexpr size_t kMaxAllocSize = SIZE_MAX - Globals::kGrowThreshold;
+
+// String - Clear & Reset
+// ======================
+
+Error String::reset() noexcept {
+  if (_type == kTypeLarge)
+    ::free(_large.data);
+
+  _resetInternal();
+  return kErrorOk;
+}
+
+Error String::clear() noexcept {
+  if (isLargeOrExternal()) {
+    _large.size = 0;
+    _large.data[0] = '\0';
+  }
+  else {
+    _raw.uptr[0] = 0;
+  }
+
+  return kErrorOk;
+}
+
+// String - Prepare
+// ================
+
+char* String::prepare(ModifyOp op, size_t size) noexcept {
+  char* curData;
+  size_t curSize;
+  size_t curCapacity;
+
+  if (isLargeOrExternal()) {
+    curData = this->_large.data;
+    curSize = this->_large.size;
+    curCapacity = this->_large.capacity;
+  }
+  else {
+    curData = this->_small.data;
+    curSize = this->_small.type;
+    curCapacity = kSSOCapacity;
+  }
+
+  if (op == ModifyOp::kAssign) {
+    if (size > curCapacity) {
+      // Prevent arithmetic overflow.
+      if (ASMJIT_UNLIKELY(size >= kMaxAllocSize))
+        return nullptr;
+
+      size_t newCapacity = Support::alignUp<size_t>(size + 1, kMinAllocSize);
+      char* newData = static_cast<char*>(::malloc(newCapacity));
+
+      if (ASMJIT_UNLIKELY(!newData))
+        return nullptr;
+
+      if (_type == kTypeLarge)
+        ::free(curData);
+
+      _large.type = kTypeLarge;
+      _large.size = size;
+      _large.capacity = newCapacity - 1;
+      _large.data = newData;
+
+      newData[size] = '\0';
+      return newData;
+    }
+    else {
+      _setSize(size);
+      curData[size] = '\0';
+      return curData;
+    }
+  }
+  else {
+    // Prevent arithmetic overflow.
+    if (ASMJIT_UNLIKELY(size >= kMaxAllocSize - curSize))
+      return nullptr;
+
+    size_t newSize = size + curSize;
+    size_t newSizePlusOne = newSize + 1;
+
+    if (newSizePlusOne > curCapacity) {
+      size_t newCapacity = Support::max<size_t>(curCapacity + 1, kMinAllocSize);
+
+      if (newCapacity < newSizePlusOne && newCapacity < Globals::kGrowThreshold)
+        newCapacity = Support::alignUpPowerOf2(newCapacity);
+
+      if (newCapacity < newSizePlusOne)
+        newCapacity = Support::alignUp(newSizePlusOne, Globals::kGrowThreshold);
+
+      if (ASMJIT_UNLIKELY(newCapacity < newSizePlusOne))
+        return nullptr;
+
+      char* newData = static_cast<char*>(::malloc(newCapacity));
+      if (ASMJIT_UNLIKELY(!newData))
+        return nullptr;
+
+      memcpy(newData, curData, curSize);
+
+      if (_type == kTypeLarge)
+        ::free(curData);
+
+      _large.type = kTypeLarge;
+      _large.size = newSize;
+      _large.capacity = newCapacity - 1;
+      _large.data = newData;
+
+      newData[newSize] = '\0';
+      return newData + curSize;
+    }
+    else {
+      _setSize(newSize);
+      curData[newSize] = '\0';
+      return curData + curSize;
+    }
+  }
+}
+
+// String - Assign
+// ===============
+
+Error String::assign(const char* data, size_t size) noexcept {
+  char* dst = nullptr;
+
+  // Null terminated string without `size` specified.
+  if (size == SIZE_MAX)
+    size = data ? strlen(data) : size_t(0);
+
+  if (isLargeOrExternal()) {
+    if (size <= _large.capacity) {
+      dst = _large.data;
+      _large.size = size;
+    }
+    else {
+      size_t capacityPlusOne = Support::alignUp(size + 1, 32);
+      if (ASMJIT_UNLIKELY(capacityPlusOne < size))
+        return DebugUtils::errored(kErrorOutOfMemory);
+
+      dst = static_cast<char*>(::malloc(capacityPlusOne));
+      if (ASMJIT_UNLIKELY(!dst))
+        return DebugUtils::errored(kErrorOutOfMemory);
+
+      if (_type == kTypeLarge)
+        ::free(_large.data);
+
+      _large.type = kTypeLarge;
+      _large.data = dst;
+      _large.size = size;
+      _large.capacity = capacityPlusOne - 1;
+    }
+  }
+  else {
+    if (size <= kSSOCapacity) {
+      ASMJIT_ASSERT(size < 0xFFu);
+
+      dst = _small.data;
+      _small.type = uint8_t(size);
+    }
+    else {
+      dst = static_cast<char*>(::malloc(size + 1));
+      if (ASMJIT_UNLIKELY(!dst))
+        return DebugUtils::errored(kErrorOutOfMemory);
+
+      _large.type = kTypeLarge;
+      _large.data = dst;
+      _large.size = size;
+      _large.capacity = size;
+    }
+  }
+
+  // Optionally copy data from `data` and null-terminate.
+  if (data && size) {
+    // NOTE: It's better to use `memmove()`. If, for any reason, somebody uses
+    // this function to substring the same string it would work as expected.
+    ::memmove(dst, data, size);
+  }
+
+  dst[size] = '\0';
+  return kErrorOk;
+}
+
+// String - Operations
+// ===================
+
+Error String::_opString(ModifyOp op, const char* str, size_t size) noexcept {
+  if (size == SIZE_MAX)
+    size = str ? strlen(str) : size_t(0);
+
+  if (!size)
+    return kErrorOk;
+
+  char* p = prepare(op, size);
+  if (!p)
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  memcpy(p, str, size);
+  return kErrorOk;
+}
+
+Error String::_opChar(ModifyOp op, char c) noexcept {
+  char* p = prepare(op, 1);
+  if (!p)
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  *p = c;
+  return kErrorOk;
+}
+
+Error String::_opChars(ModifyOp op, char c, size_t n) noexcept {
+  if (!n)
+    return kErrorOk;
+
+  char* p = prepare(op, n);
+  if (!p)
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  memset(p, c, n);
+  return kErrorOk;
+}
+
+Error String::padEnd(size_t n, char c) noexcept {
+  size_t size = this->size();
+  return n > size ? appendChars(c, n - size) : kErrorOk;
+}
+
+Error String::_opNumber(ModifyOp op, uint64_t i, uint32_t base, size_t width, StringFormatFlags flags) noexcept {
+  if (base == 0)
+    base = 10;
+
+  char buf[128];
+  char* p = buf + ASMJIT_ARRAY_SIZE(buf);
+
+  uint64_t orig = i;
+  char sign = '\0';
+
+  // Format Sign
+  // -----------
+
+  if (Support::test(flags, StringFormatFlags::kSigned) && int64_t(i) < 0) {
+    i = uint64_t(-int64_t(i));
+    sign = '-';
+  }
+  else if (Support::test(flags, StringFormatFlags::kShowSign)) {
+    sign = '+';
+  }
+  else if (Support::test(flags, StringFormatFlags::kShowSpace)) {
+    sign = ' ';
+  }
+
+  // Format Number
+  // -------------
+
+  switch (base) {
+    case 2:
+    case 8:
+    case 16: {
+      uint32_t shift = Support::ctz(base);
+      uint32_t mask = base - 1;
+
+      do {
+        uint64_t d = i >> shift;
+        size_t r = size_t(i & mask);
+
+        *--p = String_baseN[r];
+        i = d;
+      } while (i);
+
+      break;
+    }
+
+    case 10: {
+      do {
+        uint64_t d = i / 10;
+        uint64_t r = i % 10;
+
+        *--p = char(uint32_t('0') + uint32_t(r));
+        i = d;
+      } while (i);
+
+      break;
+    }
+
+    default:
+      return DebugUtils::errored(kErrorInvalidArgument);
+  }
+
+  size_t numberSize = (size_t)(buf + ASMJIT_ARRAY_SIZE(buf) - p);
+
+  // Alternate Form
+  // --------------
+
+  if (Support::test(flags, StringFormatFlags::kAlternate)) {
+    if (base == 8) {
+      if (orig != 0)
+        *--p = '0';
+    }
+    if (base == 16) {
+      *--p = 'x';
+      *--p = '0';
+    }
+  }
+
+  // String Width
+  // ------------
+
+  if (sign != 0)
+    *--p = sign;
+
+  if (width > 256)
+    width = 256;
+
+  if (width <= numberSize)
+    width = 0;
+  else
+    width -= numberSize;
+
+  // Finalize
+  // --------
+
+  size_t prefixSize = (size_t)(buf + ASMJIT_ARRAY_SIZE(buf) - p) - numberSize;
+  char* data = prepare(op, prefixSize + width + numberSize);
+
+  if (!data)
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  memcpy(data, p, prefixSize);
+  data += prefixSize;
+
+  memset(data, '0', width);
+  data += width;
+
+  memcpy(data, p + prefixSize, numberSize);
+  return kErrorOk;
+}
+
+Error String::_opHex(ModifyOp op, const void* data, size_t size, char separator) noexcept {
+  char* dst;
+  const uint8_t* src = static_cast<const uint8_t*>(data);
+
+  if (!size)
+    return kErrorOk;
+
+  if (separator) {
+    if (ASMJIT_UNLIKELY(size >= SIZE_MAX / 3))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    dst = prepare(op, size * 3 - 1);
+    if (ASMJIT_UNLIKELY(!dst))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    size_t i = 0;
+    for (;;) {
+      dst[0] = String_baseN[(src[0] >> 4) & 0xF];
+      dst[1] = String_baseN[(src[0]     ) & 0xF];
+      if (++i == size)
+        break;
+      // This makes sure that the separator is only put between two hexadecimal bytes.
+      dst[2] = separator;
+      dst += 3;
+      src++;
+    }
+  }
+  else {
+    if (ASMJIT_UNLIKELY(size >= SIZE_MAX / 2))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    dst = prepare(op, size * 2);
+    if (ASMJIT_UNLIKELY(!dst))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    for (size_t i = 0; i < size; i++, dst += 2, src++) {
+      dst[0] = String_baseN[(src[0] >> 4) & 0xF];
+      dst[1] = String_baseN[(src[0]     ) & 0xF];
+    }
+  }
+
+  return kErrorOk;
+}
+
+Error String::_opFormat(ModifyOp op, const char* fmt, ...) noexcept {
+  Error err;
+  va_list ap;
+
+  va_start(ap, fmt);
+  err = _opVFormat(op, fmt, ap);
+  va_end(ap);
+
+  return err;
+}
+
+Error String::_opVFormat(ModifyOp op, const char* fmt, va_list ap) noexcept {
+  size_t startAt = (op == ModifyOp::kAssign) ? size_t(0) : size();
+  size_t remainingCapacity = capacity() - startAt;
+
+  char buf[1024];
+  int fmtResult;
+  size_t outputSize;
+
+  va_list apCopy;
+  va_copy(apCopy, ap);
+
+  if (remainingCapacity >= 128) {
+    fmtResult = vsnprintf(data() + startAt, remainingCapacity, fmt, ap);
+    outputSize = size_t(fmtResult);
+
+    if (ASMJIT_LIKELY(outputSize <= remainingCapacity)) {
+      _setSize(startAt + outputSize);
+      return kErrorOk;
+    }
+  }
+  else {
+    fmtResult = vsnprintf(buf, ASMJIT_ARRAY_SIZE(buf), fmt, ap);
+    outputSize = size_t(fmtResult);
+
+    if (ASMJIT_LIKELY(outputSize < ASMJIT_ARRAY_SIZE(buf)))
+      return _opString(op, buf, outputSize);
+  }
+
+  if (ASMJIT_UNLIKELY(fmtResult < 0))
+    return DebugUtils::errored(kErrorInvalidState);
+
+  char* p = prepare(op, outputSize);
+  if (ASMJIT_UNLIKELY(!p))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  fmtResult = vsnprintf(p, outputSize + 1, fmt, apCopy);
+  ASMJIT_ASSERT(size_t(fmtResult) == outputSize);
+
+  return kErrorOk;
+}
+
+Error String::truncate(size_t newSize) noexcept {
+  if (isLargeOrExternal()) {
+    if (newSize < _large.size) {
+      _large.data[newSize] = '\0';
+      _large.size = newSize;
+    }
+  }
+  else {
+    if (newSize < _type) {
+      _small.data[newSize] = '\0';
+      _small.type = uint8_t(newSize);
+    }
+  }
+
+  return kErrorOk;
+}
+
+bool String::eq(const char* other, size_t size) const noexcept {
+  const char* aData = data();
+  const char* bData = other;
+
+  size_t aSize = this->size();
+  size_t bSize = size;
+
+  if (bSize == SIZE_MAX) {
+    size_t i;
+    for (i = 0; i < aSize; i++)
+      if (aData[i] != bData[i] || bData[i] == 0)
+        return false;
+    return bData[i] == 0;
+  }
+  else {
+    if (aSize != bSize)
+      return false;
+    return ::memcmp(aData, bData, aSize) == 0;
+  }
+}
+
+// String - Tests
+// ==============
+
+#if defined(ASMJIT_TEST)
+UNIT(core_string) {
+  String s;
+
+  EXPECT(s.isLargeOrExternal() == false);
+  EXPECT(s.isExternal() == false);
+
+  EXPECT(s.assign('a') == kErrorOk);
+  EXPECT(s.size() == 1);
+  EXPECT(s.capacity() == String::kSSOCapacity);
+  EXPECT(s.data()[0] == 'a');
+  EXPECT(s.data()[1] == '\0');
+  EXPECT(s.eq("a") == true);
+  EXPECT(s.eq("a", 1) == true);
+
+  EXPECT(s.assignChars('b', 4) == kErrorOk);
+  EXPECT(s.size() == 4);
+  EXPECT(s.capacity() == String::kSSOCapacity);
+  EXPECT(s.data()[0] == 'b');
+  EXPECT(s.data()[1] == 'b');
+  EXPECT(s.data()[2] == 'b');
+  EXPECT(s.data()[3] == 'b');
+  EXPECT(s.data()[4] == '\0');
+  EXPECT(s.eq("bbbb") == true);
+  EXPECT(s.eq("bbbb", 4) == true);
+
+  EXPECT(s.assign("abc") == kErrorOk);
+  EXPECT(s.size() == 3);
+  EXPECT(s.capacity() == String::kSSOCapacity);
+  EXPECT(s.data()[0] == 'a');
+  EXPECT(s.data()[1] == 'b');
+  EXPECT(s.data()[2] == 'c');
+  EXPECT(s.data()[3] == '\0');
+  EXPECT(s.eq("abc") == true);
+  EXPECT(s.eq("abc", 3) == true);
+
+  const char* large = "Large string that will not fit into SSO buffer";
+  EXPECT(s.assign(large) == kErrorOk);
+  EXPECT(s.isLargeOrExternal() == true);
+  EXPECT(s.size() == strlen(large));
+  EXPECT(s.capacity() > String::kSSOCapacity);
+  EXPECT(s.eq(large) == true);
+  EXPECT(s.eq(large, strlen(large)) == true);
+
+  const char* additional = " (additional content)";
+  EXPECT(s.isLargeOrExternal() == true);
+  EXPECT(s.append(additional) == kErrorOk);
+  EXPECT(s.size() == strlen(large) + strlen(additional));
+
+  EXPECT(s.clear() == kErrorOk);
+  EXPECT(s.size() == 0);
+  EXPECT(s.empty() == true);
+  EXPECT(s.data()[0] == '\0');
+  EXPECT(s.isLargeOrExternal() == true); // Clear should never release the memory.
+
+  EXPECT(s.appendUInt(1234) == kErrorOk);
+  EXPECT(s.eq("1234") == true);
+
+  EXPECT(s.assignUInt(0xFFFF, 16, 0, StringFormatFlags::kAlternate) == kErrorOk);
+  EXPECT(s.eq("0xFFFF"));
+
+  StringTmp<64> sTmp;
+  EXPECT(sTmp.isLargeOrExternal());
+  EXPECT(sTmp.isExternal());
+  EXPECT(sTmp.appendChars(' ', 1000) == kErrorOk);
+  EXPECT(!sTmp.isExternal());
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/string.h b/lib/lepton/asmjit/core/string.h
new file mode 100644
index 0000000000..2562e66194
--- /dev/null
+++ b/lib/lepton/asmjit/core/string.h
@@ -0,0 +1,372 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_STRING_H_INCLUDED
+#define ASMJIT_CORE_STRING_H_INCLUDED
+
+#include "../core/support.h"
+#include "../core/zone.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_utilities
+//! \{
+
+//! Format flags used by \ref String API.
+enum class StringFormatFlags : uint32_t {
+  //! No flags.
+  kNone = 0x00000000u,
+  //! Show sign.
+  kShowSign = 0x00000001u,
+  //! Show space.
+  kShowSpace = 0x00000002u,
+  //! Alternate form (use 0x when formatting HEX number).
+  kAlternate = 0x00000004u,
+  //! The input is signed.
+  kSigned = 0x80000000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(StringFormatFlags)
+
+//! Fixed string - only useful for strings that would never exceed `N - 1` characters; always null-terminated.
+template<size_t N>
+union FixedString {
+  //! \name Constants
+  //! \{
+
+  // This cannot be constexpr as GCC 4.8 refuses constexpr members of unions.
+  enum : uint32_t {
+    kNumUInt32Words = uint32_t((N + sizeof(uint32_t) - 1) / sizeof(uint32_t))
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  char str[kNumUInt32Words * sizeof(uint32_t)];
+  uint32_t u32[kNumUInt32Words];
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline bool eq(const char* other) const noexcept {
+    return strcmp(str, other) == 0;
+  }
+
+  //! \}
+};
+
+//! A simple non-reference counted string that uses small string optimization (SSO).
+//!
+//! This string has 3 allocation possibilities:
+//!
+//!   1. Small    - embedded buffer is used for up to `kSSOCapacity` characters. This should handle most small
+//!                 strings and thus avoid dynamic memory allocation for most use-cases.
+//!
+//!   2. Large    - string that doesn't fit into an embedded buffer (or string that was truncated from a larger
+//!                 buffer) and is owned by AsmJit. When you destroy the string AsmJit would automatically
+//!                 release the large buffer.
+//!
+//!   3. External - like Large (2), however, the large buffer is not owned by AsmJit and won't be released when
+//!                 the string is destroyed or reallocated. This is mostly useful for working with larger temporary
+//!                 strings allocated on stack or with immutable strings.
+class String {
+public:
+  ASMJIT_NONCOPYABLE(String)
+
+  //! String operation.
+  enum class ModifyOp : uint32_t {
+    //! Assignment - a new content replaces the current one.
+    kAssign = 0,
+    //! Append - a new content is appended to the string.
+    kAppend = 1
+  };
+
+  //! \cond INTERNAL
+  enum : uint32_t {
+    kLayoutSize = 32,
+    kSSOCapacity = kLayoutSize - 2
+  };
+
+  //! String type.
+  enum Type : uint8_t {
+    //! Large string (owned by String).
+    kTypeLarge = 0x1Fu,
+    //! External string (zone allocated or not owned by String).
+    kTypeExternal = 0x20u
+  };
+
+  union Raw {
+    uint8_t u8[kLayoutSize];
+    uint64_t u64[kLayoutSize / sizeof(uint64_t)];
+    uintptr_t uptr[kLayoutSize / sizeof(uintptr_t)];
+  };
+
+  struct Small {
+    uint8_t type;
+    char data[kSSOCapacity + 1u];
+  };
+
+  struct Large {
+    uint8_t type;
+    uint8_t reserved[sizeof(uintptr_t) - 1];
+    size_t size;
+    size_t capacity;
+    char* data;
+  };
+
+  union {
+    uint8_t _type;
+    Raw _raw;
+    Small _small;
+    Large _large;
+  };
+  //! \endcond
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a default-initialized string if zero length.
+  inline String() noexcept
+    : _small {} {}
+
+  //! Creates a string that takes ownership of the content of the `other` string.
+  inline String(String&& other) noexcept {
+    _raw = other._raw;
+    other._resetInternal();
+  }
+
+  inline ~String() noexcept {
+    reset();
+  }
+
+  //! Reset the string into a construction state.
+  ASMJIT_API Error reset() noexcept;
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline String& operator=(String&& other) noexcept {
+    swap(other);
+    other.reset();
+    return *this;
+  }
+
+  inline bool operator==(const char* other) const noexcept { return  eq(other); }
+  inline bool operator!=(const char* other) const noexcept { return !eq(other); }
+
+  inline bool operator==(const String& other) const noexcept { return  eq(other); }
+  inline bool operator!=(const String& other) const noexcept { return !eq(other); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool isExternal() const noexcept { return _type == kTypeExternal; }
+  inline bool isLargeOrExternal() const noexcept { return _type >= kTypeLarge; }
+
+  //! Tests whether the string is empty.
+  inline bool empty() const noexcept { return size() == 0; }
+  //! Returns the size of the string.
+  inline size_t size() const noexcept { return isLargeOrExternal() ? size_t(_large.size) : size_t(_type); }
+  //! Returns the capacity of the string.
+  inline size_t capacity() const noexcept { return isLargeOrExternal() ? _large.capacity : size_t(kSSOCapacity); }
+
+  //! Returns the data of the string.
+  inline char* data() noexcept { return isLargeOrExternal() ? _large.data : _small.data; }
+  //! \overload
+  inline const char* data() const noexcept { return isLargeOrExternal() ? _large.data : _small.data; }
+
+  inline char* start() noexcept { return data(); }
+  inline const char* start() const noexcept { return data(); }
+
+  inline char* end() noexcept { return data() + size(); }
+  inline const char* end() const noexcept { return data() + size(); }
+
+  //! \}
+
+  //! \name String Operations
+  //! \{
+
+  //! Swaps the content of this string with `other`.
+  inline void swap(String& other) noexcept {
+    std::swap(_raw, other._raw);
+  }
+
+  //! Clears the content of the string.
+  ASMJIT_API Error clear() noexcept;
+
+  ASMJIT_API char* prepare(ModifyOp op, size_t size) noexcept;
+
+  ASMJIT_API Error _opString(ModifyOp op, const char* str, size_t size = SIZE_MAX) noexcept;
+  ASMJIT_API Error _opChar(ModifyOp op, char c) noexcept;
+  ASMJIT_API Error _opChars(ModifyOp op, char c, size_t n) noexcept;
+  ASMJIT_API Error _opNumber(ModifyOp op, uint64_t i, uint32_t base = 0, size_t width = 0, StringFormatFlags flags = StringFormatFlags::kNone) noexcept;
+  ASMJIT_API Error _opHex(ModifyOp op, const void* data, size_t size, char separator = '\0') noexcept;
+  ASMJIT_API Error _opFormat(ModifyOp op, const char* fmt, ...) noexcept;
+  ASMJIT_API Error _opVFormat(ModifyOp op, const char* fmt, va_list ap) noexcept;
+
+  //! Replaces the current of the string with `data` of the given `size`.
+  //!
+  //! Null terminated strings can set `size` to `SIZE_MAX`.
+  ASMJIT_API Error assign(const char* data, size_t size = SIZE_MAX) noexcept;
+
+  //! Replaces the current of the string with `other` string.
+  inline Error assign(const String& other) noexcept {
+    return assign(other.data(), other.size());
+  }
+
+  //! Replaces the current of the string by a single `c` character.
+  inline Error assign(char c) noexcept {
+    return _opChar(ModifyOp::kAssign, c);
+  }
+
+  //! Replaces the current of the string by a `c` character, repeated `n` times.
+  inline Error assignChars(char c, size_t n) noexcept {
+    return _opChars(ModifyOp::kAssign, c, n);
+  }
+
+  //! Replaces the current of the string by a formatted integer `i` (signed).
+  inline Error assignInt(int64_t i, uint32_t base = 0, size_t width = 0, StringFormatFlags flags = StringFormatFlags::kNone) noexcept {
+    return _opNumber(ModifyOp::kAssign, uint64_t(i), base, width, flags | StringFormatFlags::kSigned);
+  }
+
+  //! Replaces the current of the string by a formatted integer `i` (unsigned).
+  inline Error assignUInt(uint64_t i, uint32_t base = 0, size_t width = 0, StringFormatFlags flags = StringFormatFlags::kNone) noexcept {
+    return _opNumber(ModifyOp::kAssign, i, base, width, flags);
+  }
+
+  //! Replaces the current of the string by the given `data` converted to a HEX string.
+  inline Error assignHex(const void* data, size_t size, char separator = '\0') noexcept {
+    return _opHex(ModifyOp::kAssign, data, size, separator);
+  }
+
+  //! Replaces the current of the string by a formatted string `fmt`.
+  template<typename... Args>
+  inline Error assignFormat(const char* fmt, Args&&... args) noexcept {
+    return _opFormat(ModifyOp::kAssign, fmt, std::forward<Args>(args)...);
+  }
+
+  //! Replaces the current of the string by a formatted string `fmt` (va_list version).
+  inline Error assignVFormat(const char* fmt, va_list ap) noexcept {
+    return _opVFormat(ModifyOp::kAssign, fmt, ap);
+  }
+
+  //! Appends `str` having the given size `size` to the string.
+  //!
+  //! Null terminated strings can set `size` to `SIZE_MAX`.
+  inline Error append(const char* str, size_t size = SIZE_MAX) noexcept {
+    return _opString(ModifyOp::kAppend, str, size);
+  }
+
+  //! Appends `other` string to this string.
+  inline Error append(const String& other) noexcept {
+    return append(other.data(), other.size());
+  }
+
+  //! Appends a single `c` character.
+  inline Error append(char c) noexcept {
+    return _opChar(ModifyOp::kAppend, c);
+  }
+
+  //! Appends `c` character repeated `n` times.
+  inline Error appendChars(char c, size_t n) noexcept {
+    return _opChars(ModifyOp::kAppend, c, n);
+  }
+
+  //! Appends a formatted integer `i` (signed).
+  inline Error appendInt(int64_t i, uint32_t base = 0, size_t width = 0, StringFormatFlags flags = StringFormatFlags::kNone) noexcept {
+    return _opNumber(ModifyOp::kAppend, uint64_t(i), base, width, flags | StringFormatFlags::kSigned);
+  }
+
+  //! Appends a formatted integer `i` (unsigned).
+  inline Error appendUInt(uint64_t i, uint32_t base = 0, size_t width = 0, StringFormatFlags flags = StringFormatFlags::kNone) noexcept {
+    return _opNumber(ModifyOp::kAppend, i, base, width, flags);
+  }
+
+  //! Appends the given `data` converted to a HEX string.
+  inline Error appendHex(const void* data, size_t size, char separator = '\0') noexcept {
+    return _opHex(ModifyOp::kAppend, data, size, separator);
+  }
+
+  //! Appends a formatted string `fmt` with `args`.
+  template<typename... Args>
+  inline Error appendFormat(const char* fmt, Args&&... args) noexcept {
+    return _opFormat(ModifyOp::kAppend, fmt, std::forward<Args>(args)...);
+  }
+
+  //! Appends a formatted string `fmt` (va_list version).
+  inline Error appendVFormat(const char* fmt, va_list ap) noexcept {
+    return _opVFormat(ModifyOp::kAppend, fmt, ap);
+  }
+
+  ASMJIT_API Error padEnd(size_t n, char c = ' ') noexcept;
+
+  //! Truncate the string length into `newSize`.
+  ASMJIT_API Error truncate(size_t newSize) noexcept;
+
+  ASMJIT_API bool eq(const char* other, size_t size = SIZE_MAX) const noexcept;
+  inline bool eq(const String& other) const noexcept { return eq(other.data(), other.size()); }
+
+  //! \}
+
+  //! \name Internal Functions
+  //! \{
+
+  //! Resets string to embedded and makes it empty (zero length, zero first char)
+  //!
+  //! \note This is always called internally after an external buffer was released as it zeroes all bytes
+  //! used by String's embedded storage.
+  inline void _resetInternal() noexcept {
+    for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(_raw.uptr); i++)
+      _raw.uptr[i] = 0;
+  }
+
+  inline void _setSize(size_t newSize) noexcept {
+    if (isLargeOrExternal())
+      _large.size = newSize;
+    else
+      _small.type = uint8_t(newSize);
+  }
+
+  //! \}
+};
+
+//! Temporary string builder, has statically allocated `N` bytes.
+template<size_t N>
+class StringTmp : public String {
+public:
+  ASMJIT_NONCOPYABLE(StringTmp)
+
+  //! Embedded data.
+  char _embeddedData[Support::alignUp(N + 1, sizeof(size_t))];
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline StringTmp() noexcept {
+    _resetToTemporary();
+  }
+
+  inline void _resetToTemporary() noexcept {
+    _large.type = kTypeExternal;
+    _large.capacity = ASMJIT_ARRAY_SIZE(_embeddedData) - 1;
+    _large.data = _embeddedData;
+    _embeddedData[0] = '\0';
+  }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_STRING_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/support.cpp b/lib/lepton/asmjit/core/support.cpp
new file mode 100644
index 0000000000..245398fe73
--- /dev/null
+++ b/lib/lepton/asmjit/core/support.cpp
@@ -0,0 +1,494 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// Support - Tests
+// ===============
+
+#if defined(ASMJIT_TEST)
+template<typename T>
+static void testArrays(const T* a, const T* b, size_t size) noexcept {
+  for (size_t i = 0; i < size; i++)
+    EXPECT(a[i] == b[i], "Mismatch at %u", unsigned(i));
+}
+
+static void testAlignment() noexcept {
+  INFO("Support::isAligned()");
+  EXPECT(Support::isAligned<size_t>(0xFFFF,  4) == false);
+  EXPECT(Support::isAligned<size_t>(0xFFF4,  4) == true);
+  EXPECT(Support::isAligned<size_t>(0xFFF8,  8) == true);
+  EXPECT(Support::isAligned<size_t>(0xFFF0, 16) == true);
+
+  INFO("Support::alignUp()");
+  EXPECT(Support::alignUp<size_t>(0xFFFF,  4) == 0x10000);
+  EXPECT(Support::alignUp<size_t>(0xFFF4,  4) == 0x0FFF4);
+  EXPECT(Support::alignUp<size_t>(0xFFF8,  8) == 0x0FFF8);
+  EXPECT(Support::alignUp<size_t>(0xFFF0, 16) == 0x0FFF0);
+  EXPECT(Support::alignUp<size_t>(0xFFF0, 32) == 0x10000);
+
+  INFO("Support::alignUpDiff()");
+  EXPECT(Support::alignUpDiff<size_t>(0xFFFF,  4) == 1);
+  EXPECT(Support::alignUpDiff<size_t>(0xFFF4,  4) == 0);
+  EXPECT(Support::alignUpDiff<size_t>(0xFFF8,  8) == 0);
+  EXPECT(Support::alignUpDiff<size_t>(0xFFF0, 16) == 0);
+  EXPECT(Support::alignUpDiff<size_t>(0xFFF0, 32) == 16);
+
+  INFO("Support::alignUpPowerOf2()");
+  EXPECT(Support::alignUpPowerOf2<size_t>(0x0000) == 0x00000);
+  EXPECT(Support::alignUpPowerOf2<size_t>(0xFFFF) == 0x10000);
+  EXPECT(Support::alignUpPowerOf2<size_t>(0xF123) == 0x10000);
+  EXPECT(Support::alignUpPowerOf2<size_t>(0x0F00) == 0x01000);
+  EXPECT(Support::alignUpPowerOf2<size_t>(0x0100) == 0x00100);
+  EXPECT(Support::alignUpPowerOf2<size_t>(0x1001) == 0x02000);
+}
+
+static void testBitUtils() noexcept {
+  uint32_t i;
+
+  INFO("Support::shl() / shr()");
+  EXPECT(Support::shl(int32_t(0x00001111), 16) == int32_t(0x11110000u));
+  EXPECT(Support::shl(uint32_t(0x00001111), 16) == uint32_t(0x11110000u));
+  EXPECT(Support::shr(int32_t(0x11110000u), 16) == int32_t(0x00001111u));
+  EXPECT(Support::shr(uint32_t(0x11110000u), 16) == uint32_t(0x00001111u));
+  EXPECT(Support::sar(int32_t(0xFFFF0000u), 16) == int32_t(0xFFFFFFFFu));
+  EXPECT(Support::sar(uint32_t(0xFFFF0000u), 16) == uint32_t(0xFFFFFFFFu));
+
+  INFO("Support::blsi()");
+  for (i = 0; i < 32; i++) EXPECT(Support::blsi(uint32_t(1) << i) == uint32_t(1) << i);
+  for (i = 0; i < 31; i++) EXPECT(Support::blsi(uint32_t(3) << i) == uint32_t(1) << i);
+  for (i = 0; i < 64; i++) EXPECT(Support::blsi(uint64_t(1) << i) == uint64_t(1) << i);
+  for (i = 0; i < 63; i++) EXPECT(Support::blsi(uint64_t(3) << i) == uint64_t(1) << i);
+
+  INFO("Support::ctz()");
+  for (i = 0; i < 32; i++) EXPECT(Support::Internal::clzFallback(uint32_t(1) << i) == 31 - i);
+  for (i = 0; i < 64; i++) EXPECT(Support::Internal::clzFallback(uint64_t(1) << i) == 63 - i);
+  for (i = 0; i < 32; i++) EXPECT(Support::Internal::ctzFallback(uint32_t(1) << i) == i);
+  for (i = 0; i < 64; i++) EXPECT(Support::Internal::ctzFallback(uint64_t(1) << i) == i);
+  for (i = 0; i < 32; i++) EXPECT(Support::clz(uint32_t(1) << i) == 31 - i);
+  for (i = 0; i < 64; i++) EXPECT(Support::clz(uint64_t(1) << i) == 63 - i);
+  for (i = 0; i < 32; i++) EXPECT(Support::ctz(uint32_t(1) << i) == i);
+  for (i = 0; i < 64; i++) EXPECT(Support::ctz(uint64_t(1) << i) == i);
+
+  INFO("Support::bitMask()");
+  EXPECT(Support::bitMask(0, 1, 7) == 0x83u);
+  for (i = 0; i < 32; i++)
+    EXPECT(Support::bitMask(i) == (1u << i));
+
+  INFO("Support::bitTest()");
+  for (i = 0; i < 32; i++) {
+    EXPECT(Support::bitTest((1 << i), i) == true, "Support::bitTest(%X, %u) should return true", (1 << i), i);
+  }
+
+  INFO("Support::lsbMask<uint32_t>()");
+  for (i = 0; i < 32; i++) {
+    uint32_t expectedBits = 0;
+    for (uint32_t b = 0; b < i; b++)
+      expectedBits |= uint32_t(1) << b;
+    EXPECT(Support::lsbMask<uint32_t>(i) == expectedBits);
+  }
+
+  INFO("Support::lsbMask<uint64_t>()");
+  for (i = 0; i < 64; i++) {
+    uint64_t expectedBits = 0;
+    for (uint32_t b = 0; b < i; b++)
+      expectedBits |= uint64_t(1) << b;
+    EXPECT(Support::lsbMask<uint64_t>(i) == expectedBits);
+  }
+
+  INFO("Support::popcnt()");
+  for (i = 0; i < 32; i++) EXPECT(Support::popcnt((uint32_t(1) << i)) == 1);
+  for (i = 0; i < 64; i++) EXPECT(Support::popcnt((uint64_t(1) << i)) == 1);
+  EXPECT(Support::popcnt(0x000000F0) ==  4);
+  EXPECT(Support::popcnt(0x10101010) ==  4);
+  EXPECT(Support::popcnt(0xFF000000) ==  8);
+  EXPECT(Support::popcnt(0xFFFFFFF7) == 31);
+  EXPECT(Support::popcnt(0x7FFFFFFF) == 31);
+
+  INFO("Support::isPowerOf2()");
+  for (i = 0; i < 64; i++) {
+    EXPECT(Support::isPowerOf2(uint64_t(1) << i) == true);
+    EXPECT(Support::isPowerOf2((uint64_t(1) << i) ^ 0x001101) == false);
+  }
+}
+
+static void testIntUtils() noexcept {
+  INFO("Support::byteswap()");
+  EXPECT(Support::byteswap16(int32_t(0x0102)) == int32_t(0x0201));
+  EXPECT(Support::byteswap32(int32_t(0x01020304)) == int32_t(0x04030201));
+  EXPECT(Support::byteswap32(uint32_t(0x01020304)) == uint32_t(0x04030201));
+  EXPECT(Support::byteswap64(uint64_t(0x0102030405060708)) == uint64_t(0x0807060504030201));
+
+  INFO("Support::bytepack()");
+  union BytePackData {
+    uint8_t bytes[4];
+    uint32_t u32;
+  } bpdata;
+
+  bpdata.u32 = Support::bytepack32_4x8(0x00, 0x11, 0x22, 0x33);
+  EXPECT(bpdata.bytes[0] == 0x00);
+  EXPECT(bpdata.bytes[1] == 0x11);
+  EXPECT(bpdata.bytes[2] == 0x22);
+  EXPECT(bpdata.bytes[3] == 0x33);
+
+  INFO("Support::isBetween()");
+  EXPECT(Support::isBetween<int>(10 , 10, 20) == true);
+  EXPECT(Support::isBetween<int>(11 , 10, 20) == true);
+  EXPECT(Support::isBetween<int>(20 , 10, 20) == true);
+  EXPECT(Support::isBetween<int>(9  , 10, 20) == false);
+  EXPECT(Support::isBetween<int>(21 , 10, 20) == false);
+  EXPECT(Support::isBetween<int>(101, 10, 20) == false);
+
+  INFO("Support::isInt8()");
+  EXPECT(Support::isInt8(-128) == true);
+  EXPECT(Support::isInt8( 127) == true);
+  EXPECT(Support::isInt8(-129) == false);
+  EXPECT(Support::isInt8( 128) == false);
+
+  INFO("Support::isInt16()");
+  EXPECT(Support::isInt16(-32768) == true);
+  EXPECT(Support::isInt16( 32767) == true);
+  EXPECT(Support::isInt16(-32769) == false);
+  EXPECT(Support::isInt16( 32768) == false);
+
+  INFO("Support::isInt32()");
+  EXPECT(Support::isInt32( 2147483647    ) == true);
+  EXPECT(Support::isInt32(-2147483647 - 1) == true);
+  EXPECT(Support::isInt32(uint64_t(2147483648u)) == false);
+  EXPECT(Support::isInt32(uint64_t(0xFFFFFFFFu)) == false);
+  EXPECT(Support::isInt32(uint64_t(0xFFFFFFFFu) + 1) == false);
+
+  INFO("Support::isUInt8()");
+  EXPECT(Support::isUInt8(0)   == true);
+  EXPECT(Support::isUInt8(255) == true);
+  EXPECT(Support::isUInt8(256) == false);
+  EXPECT(Support::isUInt8(-1)  == false);
+
+  INFO("Support::isUInt12()");
+  EXPECT(Support::isUInt12(0)    == true);
+  EXPECT(Support::isUInt12(4095) == true);
+  EXPECT(Support::isUInt12(4096) == false);
+  EXPECT(Support::isUInt12(-1)   == false);
+
+  INFO("Support::isUInt16()");
+  EXPECT(Support::isUInt16(0)     == true);
+  EXPECT(Support::isUInt16(65535) == true);
+  EXPECT(Support::isUInt16(65536) == false);
+  EXPECT(Support::isUInt16(-1)    == false);
+
+  INFO("Support::isUInt32()");
+  EXPECT(Support::isUInt32(uint64_t(0xFFFFFFFF)) == true);
+  EXPECT(Support::isUInt32(uint64_t(0xFFFFFFFF) + 1) == false);
+  EXPECT(Support::isUInt32(-1) == false);
+}
+
+static void testReadWrite() noexcept {
+  INFO("Support::readX() / writeX()");
+
+  uint8_t arr[32] = { 0 };
+
+  Support::writeU16uBE(arr + 1, 0x0102u);
+  Support::writeU16uBE(arr + 3, 0x0304u);
+  EXPECT(Support::readU32uBE(arr + 1) == 0x01020304u);
+  EXPECT(Support::readU32uLE(arr + 1) == 0x04030201u);
+  EXPECT(Support::readU32uBE(arr + 2) == 0x02030400u);
+  EXPECT(Support::readU32uLE(arr + 2) == 0x00040302u);
+
+  Support::writeU32uLE(arr + 5, 0x05060708u);
+  EXPECT(Support::readU64uBE(arr + 1) == 0x0102030408070605u);
+  EXPECT(Support::readU64uLE(arr + 1) == 0x0506070804030201u);
+
+  Support::writeU64uLE(arr + 7, 0x1122334455667788u);
+  EXPECT(Support::readU32uBE(arr + 8) == 0x77665544u);
+}
+
+static void testBitVector() noexcept {
+  INFO("Support::bitVectorOp");
+  {
+    uint32_t vec[3] = { 0 };
+    Support::bitVectorFill(vec, 1, 64);
+    EXPECT(vec[0] == 0xFFFFFFFEu);
+    EXPECT(vec[1] == 0xFFFFFFFFu);
+    EXPECT(vec[2] == 0x00000001u);
+
+    Support::bitVectorClear(vec, 1, 1);
+    EXPECT(vec[0] == 0xFFFFFFFCu);
+    EXPECT(vec[1] == 0xFFFFFFFFu);
+    EXPECT(vec[2] == 0x00000001u);
+
+    Support::bitVectorFill(vec, 0, 32);
+    EXPECT(vec[0] == 0xFFFFFFFFu);
+    EXPECT(vec[1] == 0xFFFFFFFFu);
+    EXPECT(vec[2] == 0x00000001u);
+
+    Support::bitVectorClear(vec, 0, 32);
+    EXPECT(vec[0] == 0x00000000u);
+    EXPECT(vec[1] == 0xFFFFFFFFu);
+    EXPECT(vec[2] == 0x00000001u);
+
+    Support::bitVectorFill(vec, 1, 30);
+    EXPECT(vec[0] == 0x7FFFFFFEu);
+    EXPECT(vec[1] == 0xFFFFFFFFu);
+    EXPECT(vec[2] == 0x00000001u);
+
+    Support::bitVectorClear(vec, 1, 95);
+    EXPECT(vec[0] == 0x00000000u);
+    EXPECT(vec[1] == 0x00000000u);
+    EXPECT(vec[2] == 0x00000000u);
+
+    Support::bitVectorFill(vec, 32, 64);
+    EXPECT(vec[0] == 0x00000000u);
+    EXPECT(vec[1] == 0xFFFFFFFFu);
+    EXPECT(vec[2] == 0xFFFFFFFFu);
+
+    Support::bitVectorSetBit(vec, 1, true);
+    EXPECT(vec[0] == 0x00000002u);
+    EXPECT(vec[1] == 0xFFFFFFFFu);
+    EXPECT(vec[2] == 0xFFFFFFFFu);
+
+    Support::bitVectorSetBit(vec, 95, false);
+    EXPECT(vec[0] == 0x00000002u);
+    EXPECT(vec[1] == 0xFFFFFFFFu);
+    EXPECT(vec[2] == 0x7FFFFFFFu);
+
+    Support::bitVectorClear(vec, 33, 32);
+    EXPECT(vec[0] == 0x00000002u);
+    EXPECT(vec[1] == 0x00000001u);
+    EXPECT(vec[2] == 0x7FFFFFFEu);
+  }
+
+  INFO("Support::bitVectorIndexOf");
+  {
+    uint32_t vec1[1] = { 0x80000000 };
+    EXPECT(Support::bitVectorIndexOf(vec1, 0, true) == 31);
+    EXPECT(Support::bitVectorIndexOf(vec1, 1, true) == 31);
+    EXPECT(Support::bitVectorIndexOf(vec1, 31, true) == 31);
+
+    uint32_t vec2[2] = { 0x00000000, 0x80000000 };
+    EXPECT(Support::bitVectorIndexOf(vec2, 0, true) == 63);
+    EXPECT(Support::bitVectorIndexOf(vec2, 1, true) == 63);
+    EXPECT(Support::bitVectorIndexOf(vec2, 31, true) == 63);
+    EXPECT(Support::bitVectorIndexOf(vec2, 32, true) == 63);
+    EXPECT(Support::bitVectorIndexOf(vec2, 33, true) == 63);
+    EXPECT(Support::bitVectorIndexOf(vec2, 63, true) == 63);
+
+    uint32_t vec3[3] = { 0x00000001, 0x00000000, 0x80000000 };
+    EXPECT(Support::bitVectorIndexOf(vec3, 0, true) == 0);
+    EXPECT(Support::bitVectorIndexOf(vec3, 1, true) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec3, 2, true) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec3, 31, true) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec3, 32, true) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec3, 63, true) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec3, 64, true) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec3, 95, true) == 95);
+
+    uint32_t vec4[3] = { ~vec3[0], ~vec3[1], ~vec3[2] };
+    EXPECT(Support::bitVectorIndexOf(vec4, 0, false) == 0);
+    EXPECT(Support::bitVectorIndexOf(vec4, 1, false) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec4, 2, false) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec4, 31, false) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec4, 32, false) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec4, 63, false) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec4, 64, false) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec4, 95, false) == 95);
+  }
+
+  INFO("Support::BitWordIterator<uint32_t>");
+  {
+    Support::BitWordIterator<uint32_t> it(0x80000F01u);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 0);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 8);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 9);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 10);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 11);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 31);
+    EXPECT(!it.hasNext());
+
+    // No bits set.
+    it.init(0x00000000u);
+    ASMJIT_ASSERT(!it.hasNext());
+
+    // Only first bit set.
+    it.init(0x00000001u);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 0);
+    ASMJIT_ASSERT(!it.hasNext());
+
+    // Only last bit set (special case).
+    it.init(0x80000000u);
+    ASMJIT_ASSERT(it.hasNext());
+    ASMJIT_ASSERT(it.next() == 31);
+    ASMJIT_ASSERT(!it.hasNext());
+  }
+
+  INFO("Support::BitWordIterator<uint64_t>");
+  {
+    Support::BitWordIterator<uint64_t> it(uint64_t(1) << 63);
+    ASMJIT_ASSERT(it.hasNext());
+    ASMJIT_ASSERT(it.next() == 63);
+    ASMJIT_ASSERT(!it.hasNext());
+  }
+
+  INFO("Support::BitVectorIterator<uint32_t>");
+  {
+    // Border cases.
+    static const uint32_t bitsNone[] = { 0xFFFFFFFFu };
+    Support::BitVectorIterator<uint32_t> it(bitsNone, 0);
+
+    EXPECT(!it.hasNext());
+    it.init(bitsNone, 0, 1);
+    EXPECT(!it.hasNext());
+    it.init(bitsNone, 0, 128);
+    EXPECT(!it.hasNext());
+
+    static const uint32_t bits1[] = { 0x80000008u, 0x80000001u, 0x00000000u, 0x80000000u, 0x00000000u, 0x00000000u, 0x00003000u };
+    it.init(bits1, ASMJIT_ARRAY_SIZE(bits1));
+
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 3);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 31);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 32);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 63);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 127);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 204);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 205);
+    EXPECT(!it.hasNext());
+
+    it.init(bits1, ASMJIT_ARRAY_SIZE(bits1), 4);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 31);
+
+    it.init(bits1, ASMJIT_ARRAY_SIZE(bits1), 64);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 127);
+
+    it.init(bits1, ASMJIT_ARRAY_SIZE(bits1), 127);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 127);
+
+    static const uint32_t bits2[] = { 0x80000000u, 0x80000000u, 0x00000000u, 0x80000000u };
+    it.init(bits2, ASMJIT_ARRAY_SIZE(bits2));
+
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 31);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 63);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 127);
+    EXPECT(!it.hasNext());
+
+    static const uint32_t bits3[] = { 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u };
+    it.init(bits3, ASMJIT_ARRAY_SIZE(bits3));
+    EXPECT(!it.hasNext());
+
+    static const uint32_t bits4[] = { 0x00000000u, 0x00000000u, 0x00000000u, 0x80000000u };
+    it.init(bits4, ASMJIT_ARRAY_SIZE(bits4));
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 127);
+    EXPECT(!it.hasNext());
+  }
+
+  INFO("Support::BitVectorIterator<uint64_t>");
+  {
+    static const uint64_t bits1[] = { 0x80000000u, 0x80000000u, 0x00000000u, 0x80000000u };
+    Support::BitVectorIterator<uint64_t> it(bits1, ASMJIT_ARRAY_SIZE(bits1));
+
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 31);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 95);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 223);
+    EXPECT(!it.hasNext());
+
+    static const uint64_t bits2[] = { 0x8000000000000000u, 0, 0, 0 };
+    it.init(bits2, ASMJIT_ARRAY_SIZE(bits2));
+
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 63);
+    EXPECT(!it.hasNext());
+  }
+}
+
+static void testSorting() noexcept {
+  INFO("Support::qSort() - Testing qsort and isort of predefined arrays");
+  {
+    constexpr size_t kArraySize = 11;
+
+    int ref_[kArraySize] = { -4, -2, -1, 0, 1, 9, 12, 13, 14, 19, 22 };
+    int arr1[kArraySize] = { 0, 1, -1, 19, 22, 14, -4, 9, 12, 13, -2 };
+    int arr2[kArraySize];
+
+    memcpy(arr2, arr1, kArraySize * sizeof(int));
+
+    Support::iSort(arr1, kArraySize);
+    Support::qSort(arr2, kArraySize);
+    testArrays(arr1, ref_, kArraySize);
+    testArrays(arr2, ref_, kArraySize);
+  }
+
+  INFO("Support::qSort() - Testing qsort and isort of artificial arrays");
+  {
+    constexpr size_t kArraySize = 200;
+
+    int arr1[kArraySize];
+    int arr2[kArraySize];
+    int ref_[kArraySize];
+
+    for (size_t size = 2; size < kArraySize; size++) {
+      for (size_t i = 0; i < size; i++) {
+        arr1[i] = int(size - 1 - i);
+        arr2[i] = int(size - 1 - i);
+        ref_[i] = int(i);
+      }
+
+      Support::iSort(arr1, size);
+      Support::qSort(arr2, size);
+      testArrays(arr1, ref_, size);
+      testArrays(arr2, ref_, size);
+    }
+  }
+
+  INFO("Support::qSort() - Testing qsort and isort with an unstable compare function");
+  {
+    constexpr size_t kArraySize = 5;
+
+    float arr1[kArraySize] = { 1.0f, 0.0f, 3.0f, -1.0f, std::numeric_limits<float>::quiet_NaN() };
+    float arr2[kArraySize] = { };
+
+    memcpy(arr2, arr1, kArraySize * sizeof(float));
+
+    // We don't test as it's undefined where the NaN would be.
+    Support::iSort(arr1, kArraySize);
+    Support::qSort(arr2, kArraySize);
+  }
+}
+
+UNIT(support) {
+  testAlignment();
+  testBitUtils();
+  testIntUtils();
+  testReadWrite();
+  testBitVector();
+  testSorting();
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/support.h b/lib/lepton/asmjit/core/support.h
new file mode 100644
index 0000000000..e55b8084db
--- /dev/null
+++ b/lib/lepton/asmjit/core/support.h
@@ -0,0 +1,1773 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_SUPPORT_H_INCLUDED
+#define ASMJIT_CORE_SUPPORT_H_INCLUDED
+
+#include "../core/globals.h"
+
+#if defined(_MSC_VER)
+  #include <intrin.h>
+#endif
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_utilities
+//! \{
+
+//! Contains support classes and functions that may be used by AsmJit source and header files. Anything defined
+//! here is considered internal and should not be used outside of AsmJit and related projects like AsmTK.
+namespace Support {
+
+// Support - Basic Traits
+// ======================
+
+#if ASMJIT_ARCH_X86
+typedef uint8_t FastUInt8;
+#else
+typedef uint32_t FastUInt8;
+#endif
+
+//! \cond INTERNAL
+namespace Internal {
+  template<typename T, size_t Alignment>
+  struct AliasedUInt {};
+
+  template<> struct AliasedUInt<uint16_t, 2> { typedef uint16_t ASMJIT_MAY_ALIAS T; };
+  template<> struct AliasedUInt<uint32_t, 4> { typedef uint32_t ASMJIT_MAY_ALIAS T; };
+  template<> struct AliasedUInt<uint64_t, 8> { typedef uint64_t ASMJIT_MAY_ALIAS T; };
+
+  template<> struct AliasedUInt<uint16_t, 1> { typedef uint16_t ASMJIT_MAY_ALIAS ASMJIT_ALIGN_TYPE(T, 1); };
+  template<> struct AliasedUInt<uint32_t, 1> { typedef uint32_t ASMJIT_MAY_ALIAS ASMJIT_ALIGN_TYPE(T, 1); };
+  template<> struct AliasedUInt<uint32_t, 2> { typedef uint32_t ASMJIT_MAY_ALIAS ASMJIT_ALIGN_TYPE(T, 2); };
+  template<> struct AliasedUInt<uint64_t, 1> { typedef uint64_t ASMJIT_MAY_ALIAS ASMJIT_ALIGN_TYPE(T, 1); };
+  template<> struct AliasedUInt<uint64_t, 2> { typedef uint64_t ASMJIT_MAY_ALIAS ASMJIT_ALIGN_TYPE(T, 2); };
+  template<> struct AliasedUInt<uint64_t, 4> { typedef uint64_t ASMJIT_MAY_ALIAS ASMJIT_ALIGN_TYPE(T, 4); };
+
+  // StdInt    - Make an int-type by size (signed or unsigned) that is the
+  //             same as types defined by <stdint.h>.
+  // Int32Or64 - Make an int-type that has at least 32 bits: [u]int[32|64]_t.
+
+  template<size_t Size, unsigned Unsigned>
+  struct StdInt {}; // Fail if not specialized.
+
+  template<> struct StdInt<1, 0> { typedef int8_t   Type; };
+  template<> struct StdInt<1, 1> { typedef uint8_t  Type; };
+  template<> struct StdInt<2, 0> { typedef int16_t  Type; };
+  template<> struct StdInt<2, 1> { typedef uint16_t Type; };
+  template<> struct StdInt<4, 0> { typedef int32_t  Type; };
+  template<> struct StdInt<4, 1> { typedef uint32_t Type; };
+  template<> struct StdInt<8, 0> { typedef int64_t  Type; };
+  template<> struct StdInt<8, 1> { typedef uint64_t Type; };
+
+  template<typename T, int Unsigned = std::is_unsigned<T>::value>
+  struct Int32Or64 : public StdInt<sizeof(T) <= 4 ? size_t(4) : sizeof(T), Unsigned> {};
+}
+//! \endcond
+
+template<typename T>
+static constexpr bool isUnsigned() noexcept { return std::is_unsigned<T>::value; }
+
+//! Casts an integer `x` to either `int32_t` or `int64_t` depending on `T`.
+template<typename T>
+static constexpr typename Internal::Int32Or64<T, 0>::Type asInt(const T& x) noexcept {
+  return (typename Internal::Int32Or64<T, 0>::Type)x;
+}
+
+//! Casts an integer `x` to either `uint32_t` or `uint64_t` depending on `T`.
+template<typename T>
+static constexpr typename Internal::Int32Or64<T, 1>::Type asUInt(const T& x) noexcept {
+  return (typename Internal::Int32Or64<T, 1>::Type)x;
+}
+
+//! Casts an integer `x` to either `int32_t`, uint32_t`, `int64_t`, or `uint64_t` depending on `T`.
+template<typename T>
+static constexpr typename Internal::Int32Or64<T>::Type asNormalized(const T& x) noexcept {
+  return (typename Internal::Int32Or64<T>::Type)x;
+}
+
+//! Casts an integer `x` to the same type as defined by `<stdint.h>`.
+template<typename T>
+static constexpr typename Internal::StdInt<sizeof(T), isUnsigned<T>()>::Type asStdInt(const T& x) noexcept {
+  return (typename Internal::StdInt<sizeof(T), isUnsigned<T>()>::Type)x;
+}
+
+//! A helper class that can be used to iterate over enum values.
+template<typename T, T from = (T)0, T to = T::kMaxValue>
+struct EnumValues {
+  typedef typename std::underlying_type<T>::type ValueType;
+
+  struct Iterator {
+    ValueType value;
+
+    inline T operator*() const { return (T)value; }
+    inline void operator++() { ++value; }
+
+    inline bool operator==(const Iterator& other) const noexcept { return value == other.value; }
+    inline bool operator!=(const Iterator& other) const noexcept { return value != other.value; }
+  };
+
+  inline Iterator begin() const noexcept { return Iterator{ValueType(from)}; }
+  inline Iterator end() const noexcept { return Iterator{ValueType(to) + 1}; }
+};
+
+// Support - BitCast
+// =================
+
+//! \cond
+namespace Internal {
+  template<typename DstT, typename SrcT>
+  union BitCastUnion {
+    inline BitCastUnion(SrcT src) noexcept : src(src) {}
+    SrcT src;
+    DstT dst;
+  };
+}
+//! \endcond
+
+//! Bit-casts from `Src` type to `Dst` type.
+//!
+//! Useful to bit-cast between integers and floating points.
+template<typename Dst, typename Src>
+static inline Dst bitCast(const Src& x) noexcept { return Internal::BitCastUnion<Dst, Src>(x).dst; }
+
+// Support - BitOps
+// ================
+
+//! Storage used to store a pack of bits (should by compatible with a machine word).
+typedef Internal::StdInt<sizeof(uintptr_t), 1>::Type BitWord;
+
+template<typename T>
+static constexpr uint32_t bitSizeOf() noexcept { return uint32_t(sizeof(T) * 8u); }
+
+//! Number of bits stored in a single `BitWord`.
+static constexpr uint32_t kBitWordSizeInBits = bitSizeOf<BitWord>();
+
+//! Returns `0 - x` in a safe way (no undefined behavior), works for unsigned numbers as well.
+template<typename T>
+static constexpr T neg(const T& x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+  return T(U(0) - U(x));
+}
+
+template<typename T>
+static constexpr T allOnes() noexcept { return neg<T>(T(1)); }
+
+//! Returns `x << y` (shift left logical) by explicitly casting `x` to an unsigned type and back.
+template<typename X, typename Y>
+static constexpr X shl(const X& x, const Y& y) noexcept {
+  typedef typename std::make_unsigned<X>::type U;
+  return X(U(x) << y);
+}
+
+//! Returns `x >> y` (shift right logical) by explicitly casting `x` to an unsigned type and back.
+template<typename X, typename Y>
+static constexpr X shr(const X& x, const Y& y) noexcept {
+  typedef typename std::make_unsigned<X>::type U;
+  return X(U(x) >> y);
+}
+
+//! Returns `x >> y` (shift right arithmetic) by explicitly casting `x` to a signed type and back.
+template<typename X, typename Y>
+static constexpr X sar(const X& x, const Y& y) noexcept {
+  typedef typename std::make_signed<X>::type S;
+  return X(S(x) >> y);
+}
+
+template<typename X, typename Y>
+static constexpr X ror(const X& x, const Y& y) noexcept {
+  typedef typename std::make_unsigned<X>::type U;
+  return X((U(x) >> y) | (U(x) << (bitSizeOf<U>() - y)));
+}
+
+//! Returns `x | (x >> y)` - helper used by some bit manipulation helpers.
+template<typename X, typename Y>
+static constexpr X or_shr(const X& x, const Y& y) noexcept { return X(x | shr(x, y)); }
+
+//! Returns `x & -x` - extracts lowest set isolated bit (like BLSI instruction).
+template<typename T>
+static constexpr T blsi(T x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+  return T(U(x) & neg(U(x)));
+}
+
+//! Tests whether the given value `x` has `n`th bit set.
+template<typename T, typename IndexT>
+static constexpr bool bitTest(T x, IndexT n) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+  return (U(x) & (U(1) << asStdInt(n))) != 0;
+}
+
+// Tests whether the given `value` is a consecutive mask of bits that starts at
+// the least significant bit.
+template<typename T>
+static inline constexpr bool isLsbMask(const T& value) {
+  typedef typename std::make_unsigned<T>::type U;
+  return value && ((U(value) + 1u) & U(value)) == 0;
+}
+
+// Tests whether the given value contains at least one bit or whether it's a
+// bit-mask of consecutive bits.
+//
+// This function is similar to \ref isLsbMask(), but the mask doesn't have to
+// start at a least significant bit.
+template<typename T>
+static inline constexpr bool isConsecutiveMask(const T& value) {
+  typedef typename std::make_unsigned<T>::type U;
+  return value && isLsbMask((U(value) - 1u) | U(value));
+}
+
+//! Generates a trailing bit-mask that has `n` least significant (trailing) bits set.
+template<typename T, typename CountT>
+static constexpr T lsbMask(const CountT& n) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+  return (sizeof(U) < sizeof(uintptr_t))
+    // Prevent undefined behavior by using a larger type than T.
+    ? T(U((uintptr_t(1) << n) - uintptr_t(1)))
+    // Prevent undefined behavior by checking `n` before shift.
+    : n ? T(shr(allOnes<T>(), bitSizeOf<T>() - size_t(n))) : T(0);
+}
+
+//! Generats a leading bit-mask that has `n` most significant (leading) bits set.
+template<typename T, typename CountT>
+static constexpr T msbMask(const CountT& n) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+  return (sizeof(U) < sizeof(uintptr_t))
+    // Prevent undefined behavior by using a larger type than T.
+    ? T(allOnes<uintptr_t>() >> (bitSizeOf<uintptr_t>() - n))
+    // Prevent undefined behavior by performing `n & (nBits - 1)` so it's always within the range.
+    : T(sar(U(n != 0) << (bitSizeOf<U>() - 1), n ? uint32_t(n - 1) : uint32_t(0)));
+}
+
+//! Returns a bit-mask that has `x` bit set.
+template<typename Index>
+static constexpr uint32_t bitMask(const Index& x) noexcept { return (1u << asUInt(x)); }
+
+//! Returns a bit-mask that has `x` bit set (multiple arguments).
+template<typename Index, typename... Args>
+static constexpr uint32_t bitMask(const Index& x, Args... args) noexcept { return bitMask(x) | bitMask(args...); }
+
+//! Converts a boolean value `b` to zero or full mask (all bits set).
+template<typename DstT, typename SrcT>
+static constexpr DstT bitMaskFromBool(SrcT b) noexcept {
+  typedef typename std::make_unsigned<DstT>::type U;
+  return DstT(U(0) - U(b));
+}
+
+//! Tests whether `a & b` is non-zero.
+template<typename A, typename B>
+static inline constexpr bool test(A a, B b) noexcept { return (asUInt(a) & asUInt(b)) != 0; }
+
+//! \cond
+namespace Internal {
+  // Fills all trailing bits right from the first most significant bit set.
+  static constexpr uint8_t fillTrailingBitsImpl(uint8_t x) noexcept { return or_shr(or_shr(or_shr(x, 1), 2), 4); }
+  // Fills all trailing bits right from the first most significant bit set.
+  static constexpr uint16_t fillTrailingBitsImpl(uint16_t x) noexcept { return or_shr(or_shr(or_shr(or_shr(x, 1), 2), 4), 8); }
+  // Fills all trailing bits right from the first most significant bit set.
+  static constexpr uint32_t fillTrailingBitsImpl(uint32_t x) noexcept { return or_shr(or_shr(or_shr(or_shr(or_shr(x, 1), 2), 4), 8), 16); }
+  // Fills all trailing bits right from the first most significant bit set.
+  static constexpr uint64_t fillTrailingBitsImpl(uint64_t x) noexcept { return or_shr(or_shr(or_shr(or_shr(or_shr(or_shr(x, 1), 2), 4), 8), 16), 32); }
+}
+//! \endcond
+
+// Fills all trailing bits right from the first most significant bit set.
+template<typename T>
+static constexpr T fillTrailingBits(const T& x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+  return T(Internal::fillTrailingBitsImpl(U(x)));
+}
+
+// Support - Count Leading/Trailing Zeros
+// ======================================
+
+//! \cond
+namespace Internal {
+namespace {
+
+template<typename T>
+struct BitScanData { T x; uint32_t n; };
+
+template<typename T, uint32_t N>
+struct BitScanCalc {
+  static constexpr BitScanData<T> advanceLeft(const BitScanData<T>& data, uint32_t n) noexcept {
+    return BitScanData<T> { data.x << n, data.n + n };
+  }
+
+  static constexpr BitScanData<T> advanceRight(const BitScanData<T>& data, uint32_t n) noexcept {
+    return BitScanData<T> { data.x >> n, data.n + n };
+  }
+
+  static constexpr BitScanData<T> clz(const BitScanData<T>& data) noexcept {
+    return BitScanCalc<T, N / 2>::clz(advanceLeft(data, data.x & (allOnes<T>() << (bitSizeOf<T>() - N)) ? uint32_t(0) : N));
+  }
+
+  static constexpr BitScanData<T> ctz(const BitScanData<T>& data) noexcept {
+    return BitScanCalc<T, N / 2>::ctz(advanceRight(data, data.x & (allOnes<T>() >> (bitSizeOf<T>() - N)) ? uint32_t(0) : N));
+  }
+};
+
+template<typename T>
+struct BitScanCalc<T, 0> {
+  static constexpr BitScanData<T> clz(const BitScanData<T>& ctx) noexcept {
+    return BitScanData<T> { 0, ctx.n - uint32_t(ctx.x >> (bitSizeOf<T>() - 1)) };
+  }
+
+  static constexpr BitScanData<T> ctz(const BitScanData<T>& ctx) noexcept {
+    return BitScanData<T> { 0, ctx.n - uint32_t(ctx.x & 0x1) };
+  }
+};
+
+template<typename T>
+constexpr uint32_t clzFallback(const T& x) noexcept {
+  return BitScanCalc<T, bitSizeOf<T>() / 2u>::clz(BitScanData<T>{x, 1}).n;
+}
+
+template<typename T>
+constexpr uint32_t ctzFallback(const T& x) noexcept {
+  return BitScanCalc<T, bitSizeOf<T>() / 2u>::ctz(BitScanData<T>{x, 1}).n;
+}
+
+template<typename T> inline uint32_t clzImpl(const T& x) noexcept { return clzFallback(asUInt(x)); }
+template<typename T> inline uint32_t ctzImpl(const T& x) noexcept { return ctzFallback(asUInt(x)); }
+
+#if !defined(ASMJIT_NO_INTRINSICS)
+# if defined(__GNUC__)
+template<> inline uint32_t clzImpl(const uint32_t& x) noexcept { return uint32_t(__builtin_clz(x)); }
+template<> inline uint32_t clzImpl(const uint64_t& x) noexcept { return uint32_t(__builtin_clzll(x)); }
+template<> inline uint32_t ctzImpl(const uint32_t& x) noexcept { return uint32_t(__builtin_ctz(x)); }
+template<> inline uint32_t ctzImpl(const uint64_t& x) noexcept { return uint32_t(__builtin_ctzll(x)); }
+# elif defined(_MSC_VER)
+template<> inline uint32_t clzImpl(const uint32_t& x) noexcept { unsigned long i; _BitScanReverse(&i, x); return uint32_t(i ^ 31); }
+template<> inline uint32_t ctzImpl(const uint32_t& x) noexcept { unsigned long i; _BitScanForward(&i, x); return uint32_t(i); }
+#  if ASMJIT_ARCH_X86 == 64 || ASMJIT_ARCH_ARM == 64
+template<> inline uint32_t clzImpl(const uint64_t& x) noexcept { unsigned long i; _BitScanReverse64(&i, x); return uint32_t(i ^ 63); }
+template<> inline uint32_t ctzImpl(const uint64_t& x) noexcept { unsigned long i; _BitScanForward64(&i, x); return uint32_t(i); }
+#  endif
+# endif
+#endif
+
+} // {anonymous}
+} // {Internal}
+//! \endcond
+
+//! Count leading zeros in `x` (returns a position of a first bit set in `x`).
+//!
+//! \note The input MUST NOT be zero, otherwise the result is undefined.
+template<typename T>
+static inline uint32_t clz(T x) noexcept { return Internal::clzImpl(asUInt(x)); }
+
+//! Count trailing zeros in `x` (returns a position of a first bit set in `x`).
+//!
+//! \note The input MUST NOT be zero, otherwise the result is undefined.
+template<typename T>
+static inline uint32_t ctz(T x) noexcept { return Internal::ctzImpl(asUInt(x)); }
+
+template<uint64_t kInput>
+struct ConstCTZ {
+  static constexpr uint32_t value =
+    (kInput & (uint64_t(1) <<  0)) ?  0 :
+    (kInput & (uint64_t(1) <<  1)) ?  1 :
+    (kInput & (uint64_t(1) <<  2)) ?  2 :
+    (kInput & (uint64_t(1) <<  3)) ?  3 :
+    (kInput & (uint64_t(1) <<  4)) ?  4 :
+    (kInput & (uint64_t(1) <<  5)) ?  5 :
+    (kInput & (uint64_t(1) <<  6)) ?  6 :
+    (kInput & (uint64_t(1) <<  7)) ?  7 :
+    (kInput & (uint64_t(1) <<  8)) ?  8 :
+    (kInput & (uint64_t(1) <<  9)) ?  9 :
+    (kInput & (uint64_t(1) << 10)) ? 10 :
+    (kInput & (uint64_t(1) << 11)) ? 11 :
+    (kInput & (uint64_t(1) << 12)) ? 12 :
+    (kInput & (uint64_t(1) << 13)) ? 13 :
+    (kInput & (uint64_t(1) << 14)) ? 14 :
+    (kInput & (uint64_t(1) << 15)) ? 15 :
+    (kInput & (uint64_t(1) << 16)) ? 16 :
+    (kInput & (uint64_t(1) << 17)) ? 17 :
+    (kInput & (uint64_t(1) << 18)) ? 18 :
+    (kInput & (uint64_t(1) << 19)) ? 19 :
+    (kInput & (uint64_t(1) << 20)) ? 20 :
+    (kInput & (uint64_t(1) << 21)) ? 21 :
+    (kInput & (uint64_t(1) << 22)) ? 22 :
+    (kInput & (uint64_t(1) << 23)) ? 23 :
+    (kInput & (uint64_t(1) << 24)) ? 24 :
+    (kInput & (uint64_t(1) << 25)) ? 25 :
+    (kInput & (uint64_t(1) << 26)) ? 26 :
+    (kInput & (uint64_t(1) << 27)) ? 27 :
+    (kInput & (uint64_t(1) << 28)) ? 28 :
+    (kInput & (uint64_t(1) << 29)) ? 29 :
+    (kInput & (uint64_t(1) << 30)) ? 30 :
+    (kInput & (uint64_t(1) << 31)) ? 31 :
+    (kInput & (uint64_t(1) << 32)) ? 32 :
+    (kInput & (uint64_t(1) << 33)) ? 33 :
+    (kInput & (uint64_t(1) << 34)) ? 34 :
+    (kInput & (uint64_t(1) << 35)) ? 35 :
+    (kInput & (uint64_t(1) << 36)) ? 36 :
+    (kInput & (uint64_t(1) << 37)) ? 37 :
+    (kInput & (uint64_t(1) << 38)) ? 38 :
+    (kInput & (uint64_t(1) << 39)) ? 39 :
+    (kInput & (uint64_t(1) << 40)) ? 40 :
+    (kInput & (uint64_t(1) << 41)) ? 41 :
+    (kInput & (uint64_t(1) << 42)) ? 42 :
+    (kInput & (uint64_t(1) << 43)) ? 43 :
+    (kInput & (uint64_t(1) << 44)) ? 44 :
+    (kInput & (uint64_t(1) << 45)) ? 45 :
+    (kInput & (uint64_t(1) << 46)) ? 46 :
+    (kInput & (uint64_t(1) << 47)) ? 47 :
+    (kInput & (uint64_t(1) << 48)) ? 48 :
+    (kInput & (uint64_t(1) << 49)) ? 49 :
+    (kInput & (uint64_t(1) << 50)) ? 50 :
+    (kInput & (uint64_t(1) << 51)) ? 51 :
+    (kInput & (uint64_t(1) << 52)) ? 52 :
+    (kInput & (uint64_t(1) << 53)) ? 53 :
+    (kInput & (uint64_t(1) << 54)) ? 54 :
+    (kInput & (uint64_t(1) << 55)) ? 55 :
+    (kInput & (uint64_t(1) << 56)) ? 56 :
+    (kInput & (uint64_t(1) << 57)) ? 57 :
+    (kInput & (uint64_t(1) << 58)) ? 58 :
+    (kInput & (uint64_t(1) << 59)) ? 59 :
+    (kInput & (uint64_t(1) << 60)) ? 60 :
+    (kInput & (uint64_t(1) << 61)) ? 61 :
+    (kInput & (uint64_t(1) << 62)) ? 62 :
+    (kInput & (uint64_t(1) << 63)) ? 63 : 64;
+};
+
+// Support - PopCnt
+// ================
+
+// Based on the following resource:
+//   http://graphics.stanford.edu/~seander/bithacks.html
+//
+// Alternatively, for a very small number of bits in `x`:
+//   uint32_t n = 0;
+//   while (x) {
+//     x &= x - 1;
+//     n++;
+//   }
+//   return n;
+
+//! \cond
+namespace Internal {
+  static inline uint32_t constPopcntImpl(uint32_t x) noexcept {
+    x = x - ((x >> 1) & 0x55555555u);
+    x = (x & 0x33333333u) + ((x >> 2) & 0x33333333u);
+    return (((x + (x >> 4)) & 0x0F0F0F0Fu) * 0x01010101u) >> 24;
+  }
+
+  static inline uint32_t constPopcntImpl(uint64_t x) noexcept {
+    if (ASMJIT_ARCH_BITS >= 64) {
+      x = x - ((x >> 1) & 0x5555555555555555u);
+      x = (x & 0x3333333333333333u) + ((x >> 2) & 0x3333333333333333u);
+      return uint32_t((((x + (x >> 4)) & 0x0F0F0F0F0F0F0F0Fu) * 0x0101010101010101u) >> 56);
+    }
+    else {
+      return constPopcntImpl(uint32_t(x >> 32)) +
+             constPopcntImpl(uint32_t(x & 0xFFFFFFFFu));
+    }
+  }
+
+  static inline uint32_t popcntImpl(uint32_t x) noexcept {
+  #if defined(__GNUC__)
+    return uint32_t(__builtin_popcount(x));
+  #else
+    return constPopcntImpl(asUInt(x));
+  #endif
+  }
+
+  static inline uint32_t popcntImpl(uint64_t x) noexcept {
+  #if defined(__GNUC__)
+    return uint32_t(__builtin_popcountll(x));
+  #else
+    return constPopcntImpl(asUInt(x));
+  #endif
+  }
+}
+//! \endcond
+
+//! Calculates count of bits in `x`.
+template<typename T>
+static inline uint32_t popcnt(T x) noexcept { return Internal::popcntImpl(asUInt(x)); }
+
+//! Calculates count of bits in `x` (useful in constant expressions).
+template<typename T>
+static inline uint32_t constPopcnt(T x) noexcept { return Internal::constPopcntImpl(asUInt(x)); }
+
+// Support - Min/Max
+// =================
+
+// NOTE: These are constexpr `min()` and `max()` implementations that are not
+// exactly the same as `std::min()` and `std::max()`. The return value is not
+// a reference to `a` or `b` but it's a new value instead.
+
+template<typename T>
+static constexpr T min(const T& a, const T& b) noexcept { return b < a ? b : a; }
+
+template<typename T, typename... Args>
+static constexpr T min(const T& a, const T& b, Args&&... args) noexcept { return min(min(a, b), std::forward<Args>(args)...); }
+
+template<typename T>
+static constexpr T max(const T& a, const T& b) noexcept { return a < b ? b : a; }
+
+template<typename T, typename... Args>
+static constexpr T max(const T& a, const T& b, Args&&... args) noexcept { return max(max(a, b), std::forward<Args>(args)...); }
+
+// Support - Immediate Helpers
+// ===========================
+
+namespace Internal {
+  template<typename T, bool IsFloat>
+  struct ImmConv {
+    static inline int64_t fromT(const T& x) noexcept { return int64_t(x); }
+    static inline T toT(int64_t x) noexcept { return T(uint64_t(x) & Support::allOnes<typename std::make_unsigned<T>::type>()); }
+  };
+
+  template<typename T>
+  struct ImmConv<T, true> {
+    static inline int64_t fromT(const T& x) noexcept { return int64_t(bitCast<int64_t>(double(x))); }
+    static inline T toT(int64_t x) noexcept { return T(bitCast<double>(x)); }
+  };
+}
+
+template<typename T>
+static inline int64_t immediateFromT(const T& x) noexcept { return Internal::ImmConv<T, std::is_floating_point<T>::value>::fromT(x); }
+
+template<typename T>
+static inline T immediateToT(int64_t x) noexcept { return Internal::ImmConv<T, std::is_floating_point<T>::value>::toT(x); }
+
+// Support - Overflow Arithmetic
+// =============================
+
+//! \cond
+namespace Internal {
+  template<typename T>
+  inline T addOverflowFallback(T x, T y, FastUInt8* of) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+
+    U result = U(x) + U(y);
+    *of = FastUInt8(*of | FastUInt8(isUnsigned<T>() ? result < U(x) : T((U(x) ^ ~U(y)) & (U(x) ^ result)) < 0));
+    return T(result);
+  }
+
+  template<typename T>
+  inline T subOverflowFallback(T x, T y, FastUInt8* of) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+
+    U result = U(x) - U(y);
+    *of = FastUInt8(*of | FastUInt8(isUnsigned<T>() ? result > U(x) : T((U(x) ^ U(y)) & (U(x) ^ result)) < 0));
+    return T(result);
+  }
+
+  template<typename T>
+  inline T mulOverflowFallback(T x, T y, FastUInt8* of) noexcept {
+    typedef typename Internal::StdInt<sizeof(T) * 2, isUnsigned<T>()>::Type I;
+    typedef typename std::make_unsigned<I>::type U;
+
+    U mask = allOnes<U>();
+    if (std::is_signed<T>::value) {
+      U prod = U(I(x)) * U(I(y));
+      *of = FastUInt8(*of | FastUInt8(I(prod) < I(std::numeric_limits<T>::lowest()) || I(prod) > I(std::numeric_limits<T>::max())));
+      return T(I(prod & mask));
+    }
+    else {
+      U prod = U(x) * U(y);
+      *of = FastUInt8(*of | FastUInt8((prod & ~mask) != 0));
+      return T(prod & mask);
+    }
+  }
+
+  template<>
+  inline int64_t mulOverflowFallback(int64_t x, int64_t y, FastUInt8* of) noexcept {
+    int64_t result = int64_t(uint64_t(x) * uint64_t(y));
+    *of = FastUInt8(*of | FastUInt8(x && (result / x != y)));
+    return result;
+  }
+
+  template<>
+  inline uint64_t mulOverflowFallback(uint64_t x, uint64_t y, FastUInt8* of) noexcept {
+    uint64_t result = x * y;
+    *of = FastUInt8(*of | FastUInt8(y != 0 && allOnes<uint64_t>() / y < x));
+    return result;
+  }
+
+  // These can be specialized.
+  template<typename T> inline T addOverflowImpl(const T& x, const T& y, FastUInt8* of) noexcept { return addOverflowFallback(x, y, of); }
+  template<typename T> inline T subOverflowImpl(const T& x, const T& y, FastUInt8* of) noexcept { return subOverflowFallback(x, y, of); }
+  template<typename T> inline T mulOverflowImpl(const T& x, const T& y, FastUInt8* of) noexcept { return mulOverflowFallback(x, y, of); }
+
+  #if defined(__GNUC__) && !defined(ASMJIT_NO_INTRINSICS)
+  #if defined(__clang__) || __GNUC__ >= 5
+  #define ASMJIT_ARITH_OVERFLOW_SPECIALIZE(FUNC, T, RESULT_T, BUILTIN)     \
+    template<>                                                             \
+    inline T FUNC(const T& x, const T& y, FastUInt8* of) noexcept {        \
+      RESULT_T result;                                                     \
+      *of = FastUInt8(*of | (BUILTIN((RESULT_T)x, (RESULT_T)y, &result))); \
+      return T(result);                                                    \
+    }
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, int32_t , int               , __builtin_sadd_overflow  )
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint32_t, unsigned int      , __builtin_uadd_overflow  )
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, int64_t , long long         , __builtin_saddll_overflow)
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint64_t, unsigned long long, __builtin_uaddll_overflow)
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, int32_t , int               , __builtin_ssub_overflow  )
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint32_t, unsigned int      , __builtin_usub_overflow  )
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, int64_t , long long         , __builtin_ssubll_overflow)
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint64_t, unsigned long long, __builtin_usubll_overflow)
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, int32_t , int               , __builtin_smul_overflow  )
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, uint32_t, unsigned int      , __builtin_umul_overflow  )
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, int64_t , long long         , __builtin_smulll_overflow)
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, uint64_t, unsigned long long, __builtin_umulll_overflow)
+  #undef ASMJIT_ARITH_OVERFLOW_SPECIALIZE
+  #endif
+  #endif
+
+  // There is a bug in MSVC that makes these specializations unusable, maybe in the future...
+  #if defined(_MSC_VER) && 0
+  #define ASMJIT_ARITH_OVERFLOW_SPECIALIZE(FUNC, T, ALT_T, BUILTIN)        \
+    template<>                                                             \
+    inline T FUNC(T x, T y, FastUInt8* of) noexcept {                      \
+      ALT_T result;                                                        \
+      *of = FastUInt8(*of | BUILTIN(0, (ALT_T)x, (ALT_T)y, &result));      \
+      return T(result);                                                    \
+    }
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint32_t, unsigned int      , _addcarry_u32 )
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint32_t, unsigned int      , _subborrow_u32)
+  #if ARCH_BITS >= 64
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint64_t, unsigned __int64  , _addcarry_u64 )
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint64_t, unsigned __int64  , _subborrow_u64)
+  #endif
+  #undef ASMJIT_ARITH_OVERFLOW_SPECIALIZE
+  #endif
+} // {Internal}
+//! \endcond
+
+template<typename T>
+static inline T addOverflow(const T& x, const T& y, FastUInt8* of) noexcept { return T(Internal::addOverflowImpl(asStdInt(x), asStdInt(y), of)); }
+
+template<typename T>
+static inline T subOverflow(const T& x, const T& y, FastUInt8* of) noexcept { return T(Internal::subOverflowImpl(asStdInt(x), asStdInt(y), of)); }
+
+template<typename T>
+static inline T mulOverflow(const T& x, const T& y, FastUInt8* of) noexcept { return T(Internal::mulOverflowImpl(asStdInt(x), asStdInt(y), of)); }
+
+// Support - Alignment
+// ===================
+
+template<typename X, typename Y>
+static constexpr bool isAligned(X base, Y alignment) noexcept {
+  typedef typename Internal::StdInt<sizeof(X), 1>::Type U;
+  return ((U)base % (U)alignment) == 0;
+}
+
+//! Tests whether the `x` is a power of two (only one bit is set).
+template<typename T>
+static constexpr bool isPowerOf2(T x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+  return x && !(U(x) & (U(x) - U(1)));
+}
+
+template<typename X, typename Y>
+static constexpr X alignUp(X x, Y alignment) noexcept {
+  typedef typename Internal::StdInt<sizeof(X), 1>::Type U;
+  return (X)( ((U)x + ((U)(alignment) - 1u)) & ~((U)(alignment) - 1u) );
+}
+
+template<typename T>
+static constexpr T alignUpPowerOf2(T x) noexcept {
+  typedef typename Internal::StdInt<sizeof(T), 1>::Type U;
+  return (T)(fillTrailingBits(U(x) - 1u) + 1u);
+}
+
+//! Returns either zero or a positive difference between `base` and `base` when
+//! aligned to `alignment`.
+template<typename X, typename Y>
+static constexpr typename Internal::StdInt<sizeof(X), 1>::Type alignUpDiff(X base, Y alignment) noexcept {
+  typedef typename Internal::StdInt<sizeof(X), 1>::Type U;
+  return alignUp(U(base), alignment) - U(base);
+}
+
+template<typename X, typename Y>
+static constexpr X alignDown(X x, Y alignment) noexcept {
+  typedef typename Internal::StdInt<sizeof(X), 1>::Type U;
+  return (X)( (U)x & ~((U)(alignment) - 1u) );
+}
+
+// Support - NumGranularized
+// =========================
+
+//! Calculates the number of elements that would be required if `base` is
+//! granularized by `granularity`. This function can be used to calculate
+//! the number of BitWords to represent N bits, for example.
+template<typename X, typename Y>
+static constexpr X numGranularized(X base, Y granularity) noexcept {
+  typedef typename Internal::StdInt<sizeof(X), 1>::Type U;
+  return X((U(base) + U(granularity) - 1) / U(granularity));
+}
+
+// Support - IsBetween
+// ===================
+
+//! Checks whether `x` is greater than or equal to `a` and lesser than or equal to `b`.
+template<typename T>
+static constexpr bool isBetween(const T& x, const T& a, const T& b) noexcept {
+  return x >= a && x <= b;
+}
+
+// Support - IsInt & IsUInt
+// ========================
+
+//! Checks whether the given integer `x` can be casted to a 4-bit signed integer.
+template<typename T>
+static constexpr bool isInt4(T x) noexcept {
+  typedef typename std::make_signed<T>::type S;
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? isBetween<S>(S(x), -8, 7) : U(x) <= U(7u);
+}
+
+//! Checks whether the given integer `x` can be casted to a 7-bit signed integer.
+template<typename T>
+static constexpr bool isInt7(T x) noexcept {
+  typedef typename std::make_signed<T>::type S;
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? isBetween<S>(S(x), -64, 63) : U(x) <= U(63u);
+}
+
+//! Checks whether the given integer `x` can be casted to an 8-bit signed integer.
+template<typename T>
+static constexpr bool isInt8(T x) noexcept {
+  typedef typename std::make_signed<T>::type S;
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? sizeof(T) <= 1 || isBetween<S>(S(x), -128, 127) : U(x) <= U(127u);
+}
+
+//! Checks whether the given integer `x` can be casted to a 9-bit signed integer.
+template<typename T>
+static constexpr bool isInt9(T x) noexcept {
+  typedef typename std::make_signed<T>::type S;
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? sizeof(T) <= 1 || isBetween<S>(S(x), -256, 255)
+                                  : sizeof(T) <= 1 || U(x) <= U(255u);
+}
+
+//! Checks whether the given integer `x` can be casted to a 10-bit signed integer.
+template<typename T>
+static constexpr bool isInt10(T x) noexcept {
+  typedef typename std::make_signed<T>::type S;
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? sizeof(T) <= 1 || isBetween<S>(S(x), -512, 511)
+                                  : sizeof(T) <= 1 || U(x) <= U(511u);
+}
+
+//! Checks whether the given integer `x` can be casted to a 16-bit signed integer.
+template<typename T>
+static constexpr bool isInt16(T x) noexcept {
+  typedef typename std::make_signed<T>::type S;
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? sizeof(T) <= 2 || isBetween<S>(S(x), -32768, 32767)
+                                  : sizeof(T) <= 1 || U(x) <= U(32767u);
+}
+
+//! Checks whether the given integer `x` can be casted to a 32-bit signed integer.
+template<typename T>
+static constexpr bool isInt32(T x) noexcept {
+  typedef typename std::make_signed<T>::type S;
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? sizeof(T) <= 4 || isBetween<S>(S(x), -2147483647 - 1, 2147483647)
+                                  : sizeof(T) <= 2 || U(x) <= U(2147483647u);
+}
+
+//! Checks whether the given integer `x` can be casted to a 4-bit unsigned integer.
+template<typename T>
+static constexpr bool isUInt4(T x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? x >= T(0) && x <= T(15)
+                                  : U(x) <= U(15u);
+}
+
+//! Checks whether the given integer `x` can be casted to an 8-bit unsigned integer.
+template<typename T>
+static constexpr bool isUInt8(T x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? (sizeof(T) <= 1 || T(x) <= T(255)) && x >= T(0)
+                                  : (sizeof(T) <= 1 || U(x) <= U(255u));
+}
+
+//! Checks whether the given integer `x` can be casted to a 12-bit unsigned integer (ARM specific).
+template<typename T>
+static constexpr bool isUInt12(T x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? (sizeof(T) <= 1 || T(x) <= T(4095)) && x >= T(0)
+                                  : (sizeof(T) <= 1 || U(x) <= U(4095u));
+}
+
+//! Checks whether the given integer `x` can be casted to a 16-bit unsigned integer.
+template<typename T>
+static constexpr bool isUInt16(T x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? (sizeof(T) <= 2 || T(x) <= T(65535)) && x >= T(0)
+                                  : (sizeof(T) <= 2 || U(x) <= U(65535u));
+}
+
+//! Checks whether the given integer `x` can be casted to a 32-bit unsigned integer.
+template<typename T>
+static constexpr bool isUInt32(T x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? (sizeof(T) <= 4 || T(x) <= T(4294967295u)) && x >= T(0)
+                                  : (sizeof(T) <= 4 || U(x) <= U(4294967295u));
+}
+
+//! Checks whether the given integer `x` can be casted to a 32-bit unsigned integer.
+template<typename T>
+static constexpr bool isIntOrUInt32(T x) noexcept {
+  return sizeof(T) <= 4 ? true : (uint32_t(uint64_t(x) >> 32) + 1u) <= 1u;
+}
+
+static bool inline isEncodableOffset32(int32_t offset, uint32_t nBits) noexcept {
+  uint32_t nRev = 32 - nBits;
+  return Support::sar(Support::shl(offset, nRev), nRev) == offset;
+}
+
+static bool inline isEncodableOffset64(int64_t offset, uint32_t nBits) noexcept {
+  uint32_t nRev = 64 - nBits;
+  return Support::sar(Support::shl(offset, nRev), nRev) == offset;
+}
+
+// Support - ByteSwap
+// ==================
+
+static inline uint16_t byteswap16(uint16_t x) noexcept {
+  return uint16_t(((x >> 8) & 0xFFu) | ((x & 0xFFu) << 8));
+}
+
+static inline uint32_t byteswap32(uint32_t x) noexcept {
+  return (x << 24) | (x >> 24) | ((x << 8) & 0x00FF0000u) | ((x >> 8) & 0x0000FF00);
+}
+
+static inline uint64_t byteswap64(uint64_t x) noexcept {
+#if (defined(__GNUC__) || defined(__clang__)) && !defined(ASMJIT_NO_INTRINSICS)
+  return uint64_t(__builtin_bswap64(uint64_t(x)));
+#elif defined(_MSC_VER) && !defined(ASMJIT_NO_INTRINSICS)
+  return uint64_t(_byteswap_uint64(uint64_t(x)));
+#else
+  return (uint64_t(byteswap32(uint32_t(uint64_t(x) >> 32        )))      ) |
+         (uint64_t(byteswap32(uint32_t(uint64_t(x) & 0xFFFFFFFFu))) << 32) ;
+#endif
+}
+
+// Support - BytePack & Unpack
+// ===========================
+
+//! Pack four 8-bit integer into a 32-bit integer as it is an array of `{b0,b1,b2,b3}`.
+static constexpr uint32_t bytepack32_4x8(uint32_t a, uint32_t b, uint32_t c, uint32_t d) noexcept {
+  return ASMJIT_ARCH_LE ? (a | (b << 8) | (c << 16) | (d << 24))
+                        : (d | (c << 8) | (b << 16) | (a << 24));
+}
+
+template<typename T>
+static constexpr uint32_t unpackU32At0(T x) noexcept { return ASMJIT_ARCH_LE ? uint32_t(uint64_t(x) & 0xFFFFFFFFu) : uint32_t(uint64_t(x) >> 32); }
+template<typename T>
+static constexpr uint32_t unpackU32At1(T x) noexcept { return ASMJIT_ARCH_BE ? uint32_t(uint64_t(x) & 0xFFFFFFFFu) : uint32_t(uint64_t(x) >> 32); }
+
+// Support - Position of byte (in bit-shift)
+// =========================================
+
+static inline uint32_t byteShiftOfDWordStruct(uint32_t index) noexcept {
+  return ASMJIT_ARCH_LE ? index * 8 : (uint32_t(sizeof(uint32_t)) - 1u - index) * 8;
+}
+
+// Support - String Utilities
+// ==========================
+
+template<typename T>
+static constexpr T asciiToLower(T c) noexcept { return T(c ^ T(T(c >= T('A') && c <= T('Z')) << 5)); }
+
+template<typename T>
+static constexpr T asciiToUpper(T c) noexcept { return T(c ^ T(T(c >= T('a') && c <= T('z')) << 5)); }
+
+static ASMJIT_FORCE_INLINE size_t strLen(const char* s, size_t maxSize) noexcept {
+  size_t i = 0;
+  while (i < maxSize && s[i] != '\0')
+    i++;
+  return i;
+}
+
+static constexpr uint32_t hashRound(uint32_t hash, uint32_t c) noexcept { return hash * 65599 + c; }
+
+// Gets a hash of the given string `data` of size `size`. Size must be valid
+// as this function doesn't check for a null terminator and allows it in the
+// middle of the string.
+static inline uint32_t hashString(const char* data, size_t size) noexcept {
+  uint32_t hashCode = 0;
+  for (uint32_t i = 0; i < size; i++)
+    hashCode = hashRound(hashCode, uint8_t(data[i]));
+  return hashCode;
+}
+
+static ASMJIT_FORCE_INLINE const char* findPackedString(const char* p, uint32_t id) noexcept {
+  uint32_t i = 0;
+  while (i < id) {
+    while (p[0])
+      p++;
+    p++;
+    i++;
+  }
+  return p;
+}
+
+//! Compares two instruction names.
+//!
+//! `a` is a null terminated instruction name from arch-specific `nameData[]`
+//! table. `b` is a possibly non-null terminated instruction name passed to
+//! `InstAPI::stringToInstId()`.
+static ASMJIT_FORCE_INLINE int cmpInstName(const char* a, const char* b, size_t size) noexcept {
+  for (size_t i = 0; i < size; i++) {
+    int c = int(uint8_t(a[i])) - int(uint8_t(b[i]));
+    if (c != 0) return c;
+  }
+  return int(uint8_t(a[size]));
+}
+
+// Support - Memory Read Access - 8 Bits
+// =====================================
+
+static inline uint8_t readU8(const void* p) noexcept { return static_cast<const uint8_t*>(p)[0]; }
+static inline int8_t readI8(const void* p) noexcept { return static_cast<const int8_t*>(p)[0]; }
+
+// Support - Memory Read Access - 16 Bits
+// ======================================
+
+template<ByteOrder BO, size_t Alignment>
+static inline uint16_t readU16x(const void* p) noexcept {
+  typedef typename Internal::AliasedUInt<uint16_t, Alignment>::T U16AlignedToN;
+  uint16_t x = static_cast<const U16AlignedToN*>(p)[0];
+  return BO == ByteOrder::kNative ? x : byteswap16(x);
+}
+
+template<size_t Alignment = 1>
+static inline uint16_t readU16u(const void* p) noexcept { return readU16x<ByteOrder::kNative, Alignment>(p); }
+template<size_t Alignment = 1>
+static inline uint16_t readU16uLE(const void* p) noexcept { return readU16x<ByteOrder::kLE, Alignment>(p); }
+template<size_t Alignment = 1>
+static inline uint16_t readU16uBE(const void* p) noexcept { return readU16x<ByteOrder::kBE, Alignment>(p); }
+
+static inline uint16_t readU16a(const void* p) noexcept { return readU16x<ByteOrder::kNative, 2>(p); }
+static inline uint16_t readU16aLE(const void* p) noexcept { return readU16x<ByteOrder::kLE, 2>(p); }
+static inline uint16_t readU16aBE(const void* p) noexcept { return readU16x<ByteOrder::kBE, 2>(p); }
+
+template<ByteOrder BO, size_t Alignment>
+static inline int16_t readI16x(const void* p) noexcept { return int16_t(readU16x<BO, Alignment>(p)); }
+
+template<size_t Alignment = 1>
+static inline int16_t readI16u(const void* p) noexcept { return int16_t(readU16x<ByteOrder::kNative, Alignment>(p)); }
+template<size_t Alignment = 1>
+static inline int16_t readI16uLE(const void* p) noexcept { return int16_t(readU16x<ByteOrder::kLE, Alignment>(p)); }
+template<size_t Alignment = 1>
+static inline int16_t readI16uBE(const void* p) noexcept { return int16_t(readU16x<ByteOrder::kBE, Alignment>(p)); }
+
+static inline int16_t readI16a(const void* p) noexcept { return int16_t(readU16x<ByteOrder::kNative, 2>(p)); }
+static inline int16_t readI16aLE(const void* p) noexcept { return int16_t(readU16x<ByteOrder::kLE, 2>(p)); }
+static inline int16_t readI16aBE(const void* p) noexcept { return int16_t(readU16x<ByteOrder::kBE, 2>(p)); }
+
+// Support - Memory Read Access - 24 Bits
+// ======================================
+
+template<ByteOrder BO = ByteOrder::kNative>
+static inline uint32_t readU24u(const void* p) noexcept {
+  uint32_t b0 = readU8(static_cast<const uint8_t*>(p) + (BO == ByteOrder::kLE ? 2 : 0));
+  uint32_t b1 = readU8(static_cast<const uint8_t*>(p) + (BO == ByteOrder::kLE ? 1 : 1));
+  uint32_t b2 = readU8(static_cast<const uint8_t*>(p) + (BO == ByteOrder::kLE ? 0 : 2));
+  return (b0 << 16) | (b1 << 8) | b2;
+}
+
+static inline uint32_t readU24uLE(const void* p) noexcept { return readU24u<ByteOrder::kLE>(p); }
+static inline uint32_t readU24uBE(const void* p) noexcept { return readU24u<ByteOrder::kBE>(p); }
+
+// Support - Memory Read Access - 32 Bits
+// ======================================
+
+template<ByteOrder BO, size_t Alignment>
+static inline uint32_t readU32x(const void* p) noexcept {
+  typedef typename Internal::AliasedUInt<uint32_t, Alignment>::T U32AlignedToN;
+  uint32_t x = static_cast<const U32AlignedToN*>(p)[0];
+  return BO == ByteOrder::kNative ? x : byteswap32(x);
+}
+
+template<size_t Alignment = 1>
+static inline uint32_t readU32u(const void* p) noexcept { return readU32x<ByteOrder::kNative, Alignment>(p); }
+template<size_t Alignment = 1>
+static inline uint32_t readU32uLE(const void* p) noexcept { return readU32x<ByteOrder::kLE, Alignment>(p); }
+template<size_t Alignment = 1>
+static inline uint32_t readU32uBE(const void* p) noexcept { return readU32x<ByteOrder::kBE, Alignment>(p); }
+
+static inline uint32_t readU32a(const void* p) noexcept { return readU32x<ByteOrder::kNative, 4>(p); }
+static inline uint32_t readU32aLE(const void* p) noexcept { return readU32x<ByteOrder::kLE, 4>(p); }
+static inline uint32_t readU32aBE(const void* p) noexcept { return readU32x<ByteOrder::kBE, 4>(p); }
+
+template<ByteOrder BO, size_t Alignment>
+static inline uint32_t readI32x(const void* p) noexcept { return int32_t(readU32x<BO, Alignment>(p)); }
+
+template<size_t Alignment = 1>
+static inline int32_t readI32u(const void* p) noexcept { return int32_t(readU32x<ByteOrder::kNative, Alignment>(p)); }
+template<size_t Alignment = 1>
+static inline int32_t readI32uLE(const void* p) noexcept { return int32_t(readU32x<ByteOrder::kLE, Alignment>(p)); }
+template<size_t Alignment = 1>
+static inline int32_t readI32uBE(const void* p) noexcept { return int32_t(readU32x<ByteOrder::kBE, Alignment>(p)); }
+
+static inline int32_t readI32a(const void* p) noexcept { return int32_t(readU32x<ByteOrder::kNative, 4>(p)); }
+static inline int32_t readI32aLE(const void* p) noexcept { return int32_t(readU32x<ByteOrder::kLE, 4>(p)); }
+static inline int32_t readI32aBE(const void* p) noexcept { return int32_t(readU32x<ByteOrder::kBE, 4>(p)); }
+
+// Support - Memory Read Access - 64 Bits
+// ======================================
+
+template<ByteOrder BO, size_t Alignment>
+static inline uint64_t readU64x(const void* p) noexcept {
+  typedef typename Internal::AliasedUInt<uint64_t, Alignment>::T U64AlignedToN;
+  uint64_t x = static_cast<const U64AlignedToN*>(p)[0];
+  return BO == ByteOrder::kNative ? x : byteswap64(x);
+}
+
+template<size_t Alignment = 1>
+static inline uint64_t readU64u(const void* p) noexcept { return readU64x<ByteOrder::kNative, Alignment>(p); }
+template<size_t Alignment = 1>
+static inline uint64_t readU64uLE(const void* p) noexcept { return readU64x<ByteOrder::kLE, Alignment>(p); }
+template<size_t Alignment = 1>
+static inline uint64_t readU64uBE(const void* p) noexcept { return readU64x<ByteOrder::kBE, Alignment>(p); }
+
+static inline uint64_t readU64a(const void* p) noexcept { return readU64x<ByteOrder::kNative, 8>(p); }
+static inline uint64_t readU64aLE(const void* p) noexcept { return readU64x<ByteOrder::kLE, 8>(p); }
+static inline uint64_t readU64aBE(const void* p) noexcept { return readU64x<ByteOrder::kBE, 8>(p); }
+
+template<ByteOrder BO, size_t Alignment>
+static inline int64_t readI64x(const void* p) noexcept { return int64_t(readU64x<BO, Alignment>(p)); }
+
+template<size_t Alignment = 1>
+static inline int64_t readI64u(const void* p) noexcept { return int64_t(readU64x<ByteOrder::kNative, Alignment>(p)); }
+template<size_t Alignment = 1>
+static inline int64_t readI64uLE(const void* p) noexcept { return int64_t(readU64x<ByteOrder::kLE, Alignment>(p)); }
+template<size_t Alignment = 1>
+static inline int64_t readI64uBE(const void* p) noexcept { return int64_t(readU64x<ByteOrder::kBE, Alignment>(p)); }
+
+static inline int64_t readI64a(const void* p) noexcept { return int64_t(readU64x<ByteOrder::kNative, 8>(p)); }
+static inline int64_t readI64aLE(const void* p) noexcept { return int64_t(readU64x<ByteOrder::kLE, 8>(p)); }
+static inline int64_t readI64aBE(const void* p) noexcept { return int64_t(readU64x<ByteOrder::kBE, 8>(p)); }
+
+// Support - Memory Write Access - 8 Bits
+// ======================================
+
+static inline void writeU8(void* p, uint8_t x) noexcept { static_cast<uint8_t*>(p)[0] = x; }
+static inline void writeI8(void* p, int8_t x) noexcept { static_cast<int8_t*>(p)[0] = x; }
+
+// Support - Memory Write Access - 16 Bits
+// =======================================
+
+template<ByteOrder BO = ByteOrder::kNative, size_t Alignment = 1>
+static inline void writeU16x(void* p, uint16_t x) noexcept {
+  typedef typename Internal::AliasedUInt<uint16_t, Alignment>::T U16AlignedToN;
+  static_cast<U16AlignedToN*>(p)[0] = BO == ByteOrder::kNative ? x : byteswap16(x);
+}
+
+template<size_t Alignment = 1>
+static inline void writeU16uLE(void* p, uint16_t x) noexcept { writeU16x<ByteOrder::kLE, Alignment>(p, x); }
+template<size_t Alignment = 1>
+static inline void writeU16uBE(void* p, uint16_t x) noexcept { writeU16x<ByteOrder::kBE, Alignment>(p, x); }
+
+static inline void writeU16a(void* p, uint16_t x) noexcept { writeU16x<ByteOrder::kNative, 2>(p, x); }
+static inline void writeU16aLE(void* p, uint16_t x) noexcept { writeU16x<ByteOrder::kLE, 2>(p, x); }
+static inline void writeU16aBE(void* p, uint16_t x) noexcept { writeU16x<ByteOrder::kBE, 2>(p, x); }
+
+
+template<ByteOrder BO = ByteOrder::kNative, size_t Alignment = 1>
+static inline void writeI16x(void* p, int16_t x) noexcept { writeU16x<BO, Alignment>(p, uint16_t(x)); }
+
+template<size_t Alignment = 1>
+static inline void writeI16uLE(void* p, int16_t x) noexcept { writeU16x<ByteOrder::kLE, Alignment>(p, uint16_t(x)); }
+template<size_t Alignment = 1>
+static inline void writeI16uBE(void* p, int16_t x) noexcept { writeU16x<ByteOrder::kBE, Alignment>(p, uint16_t(x)); }
+
+static inline void writeI16a(void* p, int16_t x) noexcept { writeU16x<ByteOrder::kNative, 2>(p, uint16_t(x)); }
+static inline void writeI16aLE(void* p, int16_t x) noexcept { writeU16x<ByteOrder::kLE, 2>(p, uint16_t(x)); }
+static inline void writeI16aBE(void* p, int16_t x) noexcept { writeU16x<ByteOrder::kBE, 2>(p, uint16_t(x)); }
+
+// Support - Memory Write Access - 24 Bits
+// =======================================
+
+template<ByteOrder BO = ByteOrder::kNative>
+static inline void writeU24u(void* p, uint32_t v) noexcept {
+  static_cast<uint8_t*>(p)[0] = uint8_t((v >> (BO == ByteOrder::kLE ?  0 : 16)) & 0xFFu);
+  static_cast<uint8_t*>(p)[1] = uint8_t((v >> (BO == ByteOrder::kLE ?  8 :  8)) & 0xFFu);
+  static_cast<uint8_t*>(p)[2] = uint8_t((v >> (BO == ByteOrder::kLE ? 16 :  0)) & 0xFFu);
+}
+
+static inline void writeU24uLE(void* p, uint32_t v) noexcept { writeU24u<ByteOrder::kLE>(p, v); }
+static inline void writeU24uBE(void* p, uint32_t v) noexcept { writeU24u<ByteOrder::kBE>(p, v); }
+
+// Support - Memory Write Access - 32 Bits
+// =======================================
+
+template<ByteOrder BO = ByteOrder::kNative, size_t Alignment = 1>
+static inline void writeU32x(void* p, uint32_t x) noexcept {
+  typedef typename Internal::AliasedUInt<uint32_t, Alignment>::T U32AlignedToN;
+  static_cast<U32AlignedToN*>(p)[0] = (BO == ByteOrder::kNative) ? x : Support::byteswap32(x);
+}
+
+template<size_t Alignment = 1>
+static inline void writeU32u(void* p, uint32_t x) noexcept { writeU32x<ByteOrder::kNative, Alignment>(p, x); }
+template<size_t Alignment = 1>
+static inline void writeU32uLE(void* p, uint32_t x) noexcept { writeU32x<ByteOrder::kLE, Alignment>(p, x); }
+template<size_t Alignment = 1>
+static inline void writeU32uBE(void* p, uint32_t x) noexcept { writeU32x<ByteOrder::kBE, Alignment>(p, x); }
+
+static inline void writeU32a(void* p, uint32_t x) noexcept { writeU32x<ByteOrder::kNative, 4>(p, x); }
+static inline void writeU32aLE(void* p, uint32_t x) noexcept { writeU32x<ByteOrder::kLE, 4>(p, x); }
+static inline void writeU32aBE(void* p, uint32_t x) noexcept { writeU32x<ByteOrder::kBE, 4>(p, x); }
+
+template<ByteOrder BO = ByteOrder::kNative, size_t Alignment = 1>
+static inline void writeI32x(void* p, int32_t x) noexcept { writeU32x<BO, Alignment>(p, uint32_t(x)); }
+
+template<size_t Alignment = 1>
+static inline void writeI32u(void* p, int32_t x) noexcept { writeU32x<ByteOrder::kNative, Alignment>(p, uint32_t(x)); }
+template<size_t Alignment = 1>
+static inline void writeI32uLE(void* p, int32_t x) noexcept { writeU32x<ByteOrder::kLE, Alignment>(p, uint32_t(x)); }
+template<size_t Alignment = 1>
+static inline void writeI32uBE(void* p, int32_t x) noexcept { writeU32x<ByteOrder::kBE, Alignment>(p, uint32_t(x)); }
+
+static inline void writeI32a(void* p, int32_t x) noexcept { writeU32x<ByteOrder::kNative, 4>(p, uint32_t(x)); }
+static inline void writeI32aLE(void* p, int32_t x) noexcept { writeU32x<ByteOrder::kLE, 4>(p, uint32_t(x)); }
+static inline void writeI32aBE(void* p, int32_t x) noexcept { writeU32x<ByteOrder::kBE, 4>(p, uint32_t(x)); }
+
+// Support - Memory Write Access - 64 Bits
+// =======================================
+
+template<ByteOrder BO = ByteOrder::kNative, size_t Alignment = 1>
+static inline void writeU64x(void* p, uint64_t x) noexcept {
+  typedef typename Internal::AliasedUInt<uint64_t, Alignment>::T U64AlignedToN;
+  static_cast<U64AlignedToN*>(p)[0] = BO == ByteOrder::kNative ? x : byteswap64(x);
+}
+
+template<size_t Alignment = 1>
+static inline void writeU64u(void* p, uint64_t x) noexcept { writeU64x<ByteOrder::kNative, Alignment>(p, x); }
+template<size_t Alignment = 1>
+static inline void writeU64uLE(void* p, uint64_t x) noexcept { writeU64x<ByteOrder::kLE, Alignment>(p, x); }
+template<size_t Alignment = 1>
+static inline void writeU64uBE(void* p, uint64_t x) noexcept { writeU64x<ByteOrder::kBE, Alignment>(p, x); }
+
+static inline void writeU64a(void* p, uint64_t x) noexcept { writeU64x<ByteOrder::kNative, 8>(p, x); }
+static inline void writeU64aLE(void* p, uint64_t x) noexcept { writeU64x<ByteOrder::kLE, 8>(p, x); }
+static inline void writeU64aBE(void* p, uint64_t x) noexcept { writeU64x<ByteOrder::kBE, 8>(p, x); }
+
+template<ByteOrder BO = ByteOrder::kNative, size_t Alignment = 1>
+static inline void writeI64x(void* p, int64_t x) noexcept { writeU64x<BO, Alignment>(p, uint64_t(x)); }
+
+template<size_t Alignment = 1>
+static inline void writeI64u(void* p, int64_t x) noexcept { writeU64x<ByteOrder::kNative, Alignment>(p, uint64_t(x)); }
+template<size_t Alignment = 1>
+static inline void writeI64uLE(void* p, int64_t x) noexcept { writeU64x<ByteOrder::kLE, Alignment>(p, uint64_t(x)); }
+template<size_t Alignment = 1>
+static inline void writeI64uBE(void* p, int64_t x) noexcept { writeU64x<ByteOrder::kBE, Alignment>(p, uint64_t(x)); }
+
+static inline void writeI64a(void* p, int64_t x) noexcept { writeU64x<ByteOrder::kNative, 8>(p, uint64_t(x)); }
+static inline void writeI64aLE(void* p, int64_t x) noexcept { writeU64x<ByteOrder::kLE, 8>(p, uint64_t(x)); }
+static inline void writeI64aBE(void* p, int64_t x) noexcept { writeU64x<ByteOrder::kBE, 8>(p, uint64_t(x)); }
+
+// Support - Operators
+// ===================
+
+//! \cond INTERNAL
+struct Set    { template<typename T> static inline T op(T x, T y) noexcept { DebugUtils::unused(x); return  y; } };
+struct SetNot { template<typename T> static inline T op(T x, T y) noexcept { DebugUtils::unused(x); return ~y; } };
+struct And    { template<typename T> static inline T op(T x, T y) noexcept { return  x &  y; } };
+struct AndNot { template<typename T> static inline T op(T x, T y) noexcept { return  x & ~y; } };
+struct NotAnd { template<typename T> static inline T op(T x, T y) noexcept { return ~x &  y; } };
+struct Or     { template<typename T> static inline T op(T x, T y) noexcept { return  x |  y; } };
+struct Xor    { template<typename T> static inline T op(T x, T y) noexcept { return  x ^  y; } };
+struct Add    { template<typename T> static inline T op(T x, T y) noexcept { return  x +  y; } };
+struct Sub    { template<typename T> static inline T op(T x, T y) noexcept { return  x -  y; } };
+struct Min    { template<typename T> static inline T op(T x, T y) noexcept { return min<T>(x, y); } };
+struct Max    { template<typename T> static inline T op(T x, T y) noexcept { return max<T>(x, y); } };
+//! \endcond
+
+// Support - BitWordIterator
+// =========================
+
+//! Iterates over each bit in a number which is set to 1.
+//!
+//! Example of use:
+//!
+//! ```
+//! uint32_t bitsToIterate = 0x110F;
+//! Support::BitWordIterator<uint32_t> it(bitsToIterate);
+//!
+//! while (it.hasNext()) {
+//!   uint32_t bitIndex = it.next();
+//!   std::printf("Bit at %u is set\n", unsigned(bitIndex));
+//! }
+//! ```
+template<typename T>
+class BitWordIterator {
+public:
+  ASMJIT_FORCE_INLINE explicit BitWordIterator(T bitWord) noexcept
+    : _bitWord(bitWord) {}
+
+  ASMJIT_FORCE_INLINE void init(T bitWord) noexcept { _bitWord = bitWord; }
+  ASMJIT_FORCE_INLINE bool hasNext() const noexcept { return _bitWord != 0; }
+
+  ASMJIT_FORCE_INLINE uint32_t next() noexcept {
+    ASMJIT_ASSERT(_bitWord != 0);
+    uint32_t index = ctz(_bitWord);
+    _bitWord ^= T(1u) << index;
+    return index;
+  }
+
+  T _bitWord;
+};
+
+// Support - BitWordFlipIterator
+// =============================
+
+template<typename T>
+class BitWordFlipIterator {
+public:
+  ASMJIT_FORCE_INLINE explicit BitWordFlipIterator(T bitWord) noexcept
+    : _bitWord(bitWord) {}
+
+  ASMJIT_FORCE_INLINE void init(T bitWord) noexcept { _bitWord = bitWord; }
+  ASMJIT_FORCE_INLINE bool hasNext() const noexcept { return _bitWord != 0; }
+
+  ASMJIT_FORCE_INLINE uint32_t nextAndFlip() noexcept {
+    ASMJIT_ASSERT(_bitWord != 0);
+    uint32_t index = ctz(_bitWord);
+    _bitWord ^= T(1u) << index;
+    return index;
+  }
+
+  T _bitWord;
+  T _xorMask;
+};
+
+// Support - BitVectorOps
+// ======================
+
+//! \cond
+namespace Internal {
+  template<typename T, class OperatorT, class FullWordOpT>
+  static inline void bitVectorOp(T* buf, size_t index, size_t count) noexcept {
+    if (count == 0)
+      return;
+
+    const size_t kTSizeInBits = bitSizeOf<T>();
+    size_t vecIndex = index / kTSizeInBits; // T[]
+    size_t bitIndex = index % kTSizeInBits; // T[][]
+
+    buf += vecIndex;
+
+    // The first BitWord requires special handling to preserve bits outside the fill region.
+    const T kFillMask = allOnes<T>();
+    size_t firstNBits = min<size_t>(kTSizeInBits - bitIndex, count);
+
+    buf[0] = OperatorT::op(buf[0], (kFillMask >> (kTSizeInBits - firstNBits)) << bitIndex);
+    buf++;
+    count -= firstNBits;
+
+    // All bits between the first and last affected BitWords can be just filled.
+    while (count >= kTSizeInBits) {
+      buf[0] = FullWordOpT::op(buf[0], kFillMask);
+      buf++;
+      count -= kTSizeInBits;
+    }
+
+    // The last BitWord requires special handling as well
+    if (count)
+      buf[0] = OperatorT::op(buf[0], kFillMask >> (kTSizeInBits - count));
+  }
+}
+//! \endcond
+
+//! Sets bit in a bit-vector `buf` at `index`.
+template<typename T>
+static inline bool bitVectorGetBit(T* buf, size_t index) noexcept {
+  const size_t kTSizeInBits = bitSizeOf<T>();
+
+  size_t vecIndex = index / kTSizeInBits;
+  size_t bitIndex = index % kTSizeInBits;
+
+  return bool((buf[vecIndex] >> bitIndex) & 0x1u);
+}
+
+//! Sets bit in a bit-vector `buf` at `index` to `value`.
+template<typename T>
+static inline void bitVectorSetBit(T* buf, size_t index, bool value) noexcept {
+  const size_t kTSizeInBits = bitSizeOf<T>();
+
+  size_t vecIndex = index / kTSizeInBits;
+  size_t bitIndex = index % kTSizeInBits;
+
+  T bitMask = T(1u) << bitIndex;
+  if (value)
+    buf[vecIndex] |= bitMask;
+  else
+    buf[vecIndex] &= ~bitMask;
+}
+
+//! Sets bit in a bit-vector `buf` at `index` to `value`.
+template<typename T>
+static inline void bitVectorFlipBit(T* buf, size_t index) noexcept {
+  const size_t kTSizeInBits = bitSizeOf<T>();
+
+  size_t vecIndex = index / kTSizeInBits;
+  size_t bitIndex = index % kTSizeInBits;
+
+  T bitMask = T(1u) << bitIndex;
+  buf[vecIndex] ^= bitMask;
+}
+
+//! Fills `count` bits in bit-vector `buf` starting at bit-index `index`.
+template<typename T>
+static inline void bitVectorFill(T* buf, size_t index, size_t count) noexcept { Internal::bitVectorOp<T, Or, Set>(buf, index, count); }
+
+//! Clears `count` bits in bit-vector `buf` starting at bit-index `index`.
+template<typename T>
+static inline void bitVectorClear(T* buf, size_t index, size_t count) noexcept { Internal::bitVectorOp<T, AndNot, SetNot>(buf, index, count); }
+
+template<typename T>
+static inline size_t bitVectorIndexOf(T* buf, size_t start, bool value) noexcept {
+  const size_t kTSizeInBits = bitSizeOf<T>();
+  size_t vecIndex = start / kTSizeInBits; // T[]
+  size_t bitIndex = start % kTSizeInBits; // T[][]
+
+  T* p = buf + vecIndex;
+
+  // We always look for zeros, if value is `true` we have to flip all bits before the search.
+  const T kFillMask = allOnes<T>();
+  const T kFlipMask = value ? T(0) : kFillMask;
+
+  // The first BitWord requires special handling as there are some bits we want to ignore.
+  T bits = (*p ^ kFlipMask) & (kFillMask << bitIndex);
+  for (;;) {
+    if (bits)
+      return (size_t)(p - buf) * kTSizeInBits + ctz(bits);
+    bits = *++p ^ kFlipMask;
+  }
+}
+
+// Support - BitVectorIterator
+// ===========================
+
+template<typename T>
+class BitVectorIterator {
+public:
+  const T* _ptr;
+  size_t _idx;
+  size_t _end;
+  T _current;
+
+  ASMJIT_FORCE_INLINE BitVectorIterator(const BitVectorIterator& other) noexcept = default;
+
+  ASMJIT_FORCE_INLINE BitVectorIterator(const T* data, size_t numBitWords, size_t start = 0) noexcept {
+    init(data, numBitWords, start);
+  }
+
+  ASMJIT_FORCE_INLINE void init(const T* data, size_t numBitWords, size_t start = 0) noexcept {
+    const T* ptr = data + (start / bitSizeOf<T>());
+    size_t idx = alignDown(start, bitSizeOf<T>());
+    size_t end = numBitWords * bitSizeOf<T>();
+
+    T bitWord = T(0);
+    if (idx < end) {
+      bitWord = *ptr++ & (allOnes<T>() << (start % bitSizeOf<T>()));
+      while (!bitWord && (idx += bitSizeOf<T>()) < end)
+        bitWord = *ptr++;
+    }
+
+    _ptr = ptr;
+    _idx = idx;
+    _end = end;
+    _current = bitWord;
+  }
+
+  ASMJIT_FORCE_INLINE bool hasNext() const noexcept {
+    return _current != T(0);
+  }
+
+  ASMJIT_FORCE_INLINE size_t next() noexcept {
+    T bitWord = _current;
+    ASMJIT_ASSERT(bitWord != T(0));
+
+    uint32_t bit = ctz(bitWord);
+    bitWord ^= T(1u) << bit;
+
+    size_t n = _idx + bit;
+    while (!bitWord && (_idx += bitSizeOf<T>()) < _end)
+      bitWord = *_ptr++;
+
+    _current = bitWord;
+    return n;
+  }
+
+  ASMJIT_FORCE_INLINE size_t peekNext() const noexcept {
+    ASMJIT_ASSERT(_current != T(0));
+    return _idx + ctz(_current);
+  }
+};
+
+// Support - BitVectorOpIterator
+// =============================
+
+template<typename T, class OperatorT>
+class BitVectorOpIterator {
+public:
+  enum : uint32_t {
+    kTSizeInBits = bitSizeOf<T>()
+  };
+
+  const T* _aPtr;
+  const T* _bPtr;
+  size_t _idx;
+  size_t _end;
+  T _current;
+
+  ASMJIT_FORCE_INLINE BitVectorOpIterator(const T* aData, const T* bData, size_t numBitWords, size_t start = 0) noexcept {
+    init(aData, bData, numBitWords, start);
+  }
+
+  ASMJIT_FORCE_INLINE void init(const T* aData, const T* bData, size_t numBitWords, size_t start = 0) noexcept {
+    const T* aPtr = aData + (start / bitSizeOf<T>());
+    const T* bPtr = bData + (start / bitSizeOf<T>());
+    size_t idx = alignDown(start, bitSizeOf<T>());
+    size_t end = numBitWords * bitSizeOf<T>();
+
+    T bitWord = T(0);
+    if (idx < end) {
+      bitWord = OperatorT::op(*aPtr++, *bPtr++) & (allOnes<T>() << (start % bitSizeOf<T>()));
+      while (!bitWord && (idx += kTSizeInBits) < end)
+        bitWord = OperatorT::op(*aPtr++, *bPtr++);
+    }
+
+    _aPtr = aPtr;
+    _bPtr = bPtr;
+    _idx = idx;
+    _end = end;
+    _current = bitWord;
+  }
+
+  ASMJIT_FORCE_INLINE bool hasNext() noexcept {
+    return _current != T(0);
+  }
+
+  ASMJIT_FORCE_INLINE size_t next() noexcept {
+    T bitWord = _current;
+    ASMJIT_ASSERT(bitWord != T(0));
+
+    uint32_t bit = ctz(bitWord);
+    bitWord ^= T(1u) << bit;
+
+    size_t n = _idx + bit;
+    while (!bitWord && (_idx += kTSizeInBits) < _end)
+      bitWord = OperatorT::op(*_aPtr++, *_bPtr++);
+
+    _current = bitWord;
+    return n;
+  }
+};
+
+// Support - Sorting
+// =================
+
+//! Sort order.
+enum class SortOrder : uint32_t {
+  //!< Ascending order.
+  kAscending  = 0,
+  //!< Descending order.
+  kDescending = 1
+};
+
+//! A helper class that provides comparison of any user-defined type that
+//! implements `<` and `>` operators (primitive types are supported as well).
+template<SortOrder kOrder = SortOrder::kAscending>
+struct Compare {
+  template<typename A, typename B>
+  inline int operator()(const A& a, const B& b) const noexcept {
+    return kOrder == SortOrder::kAscending ? int(a > b) - int(a < b) : int(a < b) - int(a > b);
+  }
+};
+
+//! Insertion sort.
+template<typename T, typename CompareT = Compare<SortOrder::kAscending>>
+static inline void iSort(T* base, size_t size, const CompareT& cmp = CompareT()) noexcept {
+  for (T* pm = base + 1; pm < base + size; pm++)
+    for (T* pl = pm; pl > base && cmp(pl[-1], pl[0]) > 0; pl--)
+      std::swap(pl[-1], pl[0]);
+}
+
+//! \cond
+namespace Internal {
+  //! Quick-sort implementation.
+  template<typename T, class CompareT>
+  struct QSortImpl {
+    enum : size_t {
+      kStackSize = 64 * 2,
+      kISortThreshold = 7
+    };
+
+    // Based on "PDCLib - Public Domain C Library" and rewritten to C++.
+    static void sort(T* base, size_t size, const CompareT& cmp) noexcept {
+      T* end = base + size;
+      T* stack[kStackSize];
+      T** stackptr = stack;
+
+      for (;;) {
+        if ((size_t)(end - base) > kISortThreshold) {
+          // We work from second to last - first will be pivot element.
+          T* pi = base + 1;
+          T* pj = end - 1;
+          std::swap(base[(size_t)(end - base) / 2], base[0]);
+
+          if (cmp(*pi  , *pj  ) > 0) std::swap(*pi  , *pj  );
+          if (cmp(*base, *pj  ) > 0) std::swap(*base, *pj  );
+          if (cmp(*pi  , *base) > 0) std::swap(*pi  , *base);
+
+          // Now we have the median for pivot element, entering main loop.
+          for (;;) {
+            while (pi < pj   && cmp(*++pi, *base) < 0) continue; // Move `i` right until `*i >= pivot`.
+            while (pj > base && cmp(*--pj, *base) > 0) continue; // Move `j` left  until `*j <= pivot`.
+
+            if (pi > pj) break;
+            std::swap(*pi, *pj);
+          }
+
+          // Move pivot into correct place.
+          std::swap(*base, *pj);
+
+          // Larger subfile base / end to stack, sort smaller.
+          if (pj - base > end - pi) {
+            // Left is larger.
+            *stackptr++ = base;
+            *stackptr++ = pj;
+            base = pi;
+          }
+          else {
+            // Right is larger.
+            *stackptr++ = pi;
+            *stackptr++ = end;
+            end = pj;
+          }
+          ASMJIT_ASSERT(stackptr <= stack + kStackSize);
+        }
+        else {
+          // UB sanitizer doesn't like applying offset to a nullptr base.
+          if (base != end)
+            iSort(base, (size_t)(end - base), cmp);
+
+          if (stackptr == stack)
+            break;
+
+          end = *--stackptr;
+          base = *--stackptr;
+        }
+      }
+    }
+  };
+}
+//! \endcond
+
+//! Quick sort implementation.
+//!
+//! The main reason to provide a custom qsort implementation is that we needed something that will
+//! never throw `bad_alloc` exception. This implementation doesn't use dynamic memory allocation.
+template<typename T, class CompareT = Compare<SortOrder::kAscending>>
+static inline void qSort(T* base, size_t size, const CompareT& cmp = CompareT()) noexcept {
+  Internal::QSortImpl<T, CompareT>::sort(base, size, cmp);
+}
+
+// Support - Array
+// ===============
+
+//! Array type, similar to std::array<T, N>, with the possibility to use enums in operator[].
+//!
+//! \note The array has C semantics - the elements in the array are not initialized.
+template<typename T, size_t N>
+struct Array {
+  //! \name Members
+  //! \{
+
+  //! The underlying array data, use \ref data() to access it.
+  T _data[N];
+
+  //! \}
+
+  //! \cond
+  // std compatibility.
+  typedef T value_type;
+  typedef size_t size_type;
+  typedef ptrdiff_t difference_type;
+
+  typedef value_type& reference;
+  typedef const value_type& const_reference;
+
+  typedef value_type* pointer;
+  typedef const value_type* const_pointer;
+
+  typedef pointer iterator;
+  typedef const_pointer const_iterator;
+  //! \endcond
+
+  //! \name Overloaded Operators
+  //! \{
+
+  template<typename Index>
+  inline T& operator[](const Index& index) noexcept {
+    typedef typename Internal::StdInt<sizeof(Index), 1>::Type U;
+    ASMJIT_ASSERT(U(index) < N);
+    return _data[U(index)];
+  }
+
+  template<typename Index>
+  inline const T& operator[](const Index& index) const noexcept {
+    typedef typename Internal::StdInt<sizeof(Index), 1>::Type U;
+    ASMJIT_ASSERT(U(index) < N);
+    return _data[U(index)];
+  }
+
+  inline bool operator==(const Array& other) const noexcept {
+    for (size_t i = 0; i < N; i++)
+      if (_data[i] != other._data[i])
+        return false;
+    return true;
+  }
+
+  inline bool operator!=(const Array& other) const noexcept {
+    return !operator==(other);
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool empty() const noexcept { return false; }
+  inline size_t size() const noexcept { return N; }
+
+  inline T* data() noexcept { return _data; }
+  inline const T* data() const noexcept { return _data; }
+
+  inline T& front() noexcept { return _data[0]; }
+  inline const T& front() const noexcept { return _data[0]; }
+
+  inline T& back() noexcept { return _data[N - 1]; }
+  inline const T& back() const noexcept { return _data[N - 1]; }
+
+  inline T* begin() noexcept { return _data; }
+  inline T* end() noexcept { return _data + N; }
+
+  inline const T* begin() const noexcept { return _data; }
+  inline const T* end() const noexcept { return _data + N; }
+
+  inline const T* cbegin() const noexcept { return _data; }
+  inline const T* cend() const noexcept { return _data + N; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void swap(Array& other) noexcept {
+    for (size_t i = 0; i < N; i++)
+      std::swap(_data[i], other._data[i]);
+  }
+
+  inline void fill(const T& value) noexcept {
+    for (size_t i = 0; i < N; i++)
+      _data[i] = value;
+  }
+
+  inline void copyFrom(const Array& other) noexcept {
+    for (size_t i = 0; i < N; i++)
+      _data[i] = other._data[i];
+  }
+
+  template<typename Operator>
+  inline void combine(const Array& other) noexcept {
+    for (size_t i = 0; i < N; i++)
+      _data[i] = Operator::op(_data[i], other._data[i]);
+  }
+
+  template<typename Operator>
+  inline T aggregate(T initialValue = T()) const noexcept {
+    T value = initialValue;
+    for (size_t i = 0; i < N; i++)
+      value = Operator::op(value, _data[i]);
+    return value;
+  }
+
+  template<typename Fn>
+  inline void forEach(Fn&& fn) noexcept {
+    for (size_t i = 0; i < N; i++)
+      fn(_data[i]);
+  }
+  //! \}
+};
+
+// Support::Temporary
+// ==================
+
+//! Used to pass a temporary buffer to:
+//!
+//!   - Containers that use user-passed buffer as an initial storage (still can grow).
+//!   - Zone allocator that would use the temporary buffer as a first block.
+struct Temporary {
+  //! \name Members
+  //! \{
+
+  void* _data;
+  size_t _size;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline constexpr Temporary(const Temporary& other) noexcept = default;
+  inline constexpr Temporary(void* data, size_t size) noexcept
+    : _data(data),
+      _size(size) {}
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline Temporary& operator=(const Temporary& other) noexcept = default;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the data storage.
+  template<typename T = void>
+  inline constexpr T* data() const noexcept { return static_cast<T*>(_data); }
+  //! Returns the data storage size in bytes.
+  inline constexpr size_t size() const noexcept { return _size; }
+
+  //! \}
+};
+
+} // {Support}
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_SUPPORT_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/target.cpp b/lib/lepton/asmjit/core/target.cpp
new file mode 100644
index 0000000000..fef025d709
--- /dev/null
+++ b/lib/lepton/asmjit/core/target.cpp
@@ -0,0 +1,14 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/target.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+Target::Target() noexcept : _environment() {}
+Target::~Target() noexcept {}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/target.h b/lib/lepton/asmjit/core/target.h
new file mode 100644
index 0000000000..23b0c6294c
--- /dev/null
+++ b/lib/lepton/asmjit/core/target.h
@@ -0,0 +1,53 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_TARGET_H_INCLUDED
+#define ASMJIT_CORE_TARGET_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/func.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_core
+//! \{
+
+//! Target is an abstract class that describes a machine code target.
+class ASMJIT_VIRTAPI Target {
+public:
+  ASMJIT_BASE_CLASS(Target)
+  ASMJIT_NONCOPYABLE(Target)
+
+  //! Target environment information.
+  Environment _environment;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a `Target` instance.
+  ASMJIT_API Target() noexcept;
+  //! Destroys the `Target` instance.
+  ASMJIT_API virtual ~Target() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns target's environment.
+  inline const Environment& environment() const noexcept { return _environment; }
+  //! Returns the target architecture.
+  inline Arch arch() const noexcept { return _environment.arch(); }
+  //! Returns the target sub-architecture.
+  inline SubArch subArch() const noexcept { return _environment.subArch(); }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_TARGET_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/type.cpp b/lib/lepton/asmjit/core/type.cpp
new file mode 100644
index 0000000000..536fb8818f
--- /dev/null
+++ b/lib/lepton/asmjit/core/type.cpp
@@ -0,0 +1,74 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/misc_p.h"
+#include "../core/type.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+namespace TypeUtils {
+
+template<uint32_t Index>
+struct ScalarOfTypeId {
+  enum : uint32_t {
+    kTypeId = uint32_t(
+      isScalar(TypeId(Index)) ? TypeId(Index) :
+      isMask8 (TypeId(Index)) ? TypeId::kUInt8 :
+      isMask16(TypeId(Index)) ? TypeId::kUInt16 :
+      isMask32(TypeId(Index)) ? TypeId::kUInt32 :
+      isMask64(TypeId(Index)) ? TypeId::kUInt64 :
+      isMmx32 (TypeId(Index)) ? TypeId::kUInt32 :
+      isMmx64 (TypeId(Index)) ? TypeId::kUInt64 :
+      isVec32 (TypeId(Index)) ? TypeId((Index - uint32_t(TypeId::_kVec32Start ) + uint32_t(TypeId::kInt8)) & 0xFF) :
+      isVec64 (TypeId(Index)) ? TypeId((Index - uint32_t(TypeId::_kVec64Start ) + uint32_t(TypeId::kInt8)) & 0xFF) :
+      isVec128(TypeId(Index)) ? TypeId((Index - uint32_t(TypeId::_kVec128Start) + uint32_t(TypeId::kInt8)) & 0xFF) :
+      isVec256(TypeId(Index)) ? TypeId((Index - uint32_t(TypeId::_kVec256Start) + uint32_t(TypeId::kInt8)) & 0xFF) :
+      isVec512(TypeId(Index)) ? TypeId((Index - uint32_t(TypeId::_kVec512Start) + uint32_t(TypeId::kInt8)) & 0xFF) : TypeId::kVoid)
+  };
+};
+
+template<uint32_t Index>
+struct SizeOfTypeId {
+  enum : uint32_t {
+    kTypeSize =
+      isInt8   (TypeId(Index)) ?  1 :
+      isUInt8  (TypeId(Index)) ?  1 :
+      isInt16  (TypeId(Index)) ?  2 :
+      isUInt16 (TypeId(Index)) ?  2 :
+      isInt32  (TypeId(Index)) ?  4 :
+      isUInt32 (TypeId(Index)) ?  4 :
+      isInt64  (TypeId(Index)) ?  8 :
+      isUInt64 (TypeId(Index)) ?  8 :
+      isFloat32(TypeId(Index)) ?  4 :
+      isFloat64(TypeId(Index)) ?  8 :
+      isFloat80(TypeId(Index)) ? 10 :
+      isMask8  (TypeId(Index)) ?  1 :
+      isMask16 (TypeId(Index)) ?  2 :
+      isMask32 (TypeId(Index)) ?  4 :
+      isMask64 (TypeId(Index)) ?  8 :
+      isMmx32  (TypeId(Index)) ?  4 :
+      isMmx64  (TypeId(Index)) ?  8 :
+      isVec32  (TypeId(Index)) ?  4 :
+      isVec64  (TypeId(Index)) ?  8 :
+      isVec128 (TypeId(Index)) ? 16 :
+      isVec256 (TypeId(Index)) ? 32 :
+      isVec512 (TypeId(Index)) ? 64 : 0
+  };
+};
+
+const TypeData _typeData = {
+  #define VALUE(x) TypeId(ScalarOfTypeId<x>::kTypeId)
+  { ASMJIT_LOOKUP_TABLE_256(VALUE, 0) },
+  #undef VALUE
+
+  #define VALUE(x) SizeOfTypeId<x>::kTypeSize
+  { ASMJIT_LOOKUP_TABLE_256(VALUE, 0) }
+  #undef VALUE
+};
+
+} // {TypeUtils}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/type.h b/lib/lepton/asmjit/core/type.h
new file mode 100644
index 0000000000..3754959e4c
--- /dev/null
+++ b/lib/lepton/asmjit/core/type.h
@@ -0,0 +1,419 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_TYPE_H_INCLUDED
+#define ASMJIT_CORE_TYPE_H_INCLUDED
+
+#include "../core/globals.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_core
+//! \{
+
+//! Type identifier provides a minimalist type system used across AsmJit library.
+//!
+//! This is an additional information that can be used to describe a value-type of physical or virtual register. It's
+//! used mostly by BaseCompiler to describe register representation (the group of data stored in the register and the
+//! width used) and it's also used by APIs that allow to describe and work with function signatures.
+enum class TypeId : uint8_t {
+  //! Void type.
+  kVoid = 0,
+
+  _kBaseStart = 32,
+  _kBaseEnd = 44,
+
+  _kIntStart = 32,
+  _kIntEnd = 41,
+
+  //! Abstract signed integer type that has a native size.
+  kIntPtr = 32,
+  //! Abstract unsigned integer type that has a native size.
+  kUIntPtr = 33,
+
+  //! 8-bit signed integer type.
+  kInt8 = 34,
+  //! 8-bit unsigned integer type.
+  kUInt8 = 35,
+  //! 16-bit signed integer type.
+  kInt16 = 36,
+  //! 16-bit unsigned integer type.
+  kUInt16 = 37,
+  //! 32-bit signed integer type.
+  kInt32 = 38,
+  //! 32-bit unsigned integer type.
+  kUInt32 = 39,
+  //! 64-bit signed integer type.
+  kInt64 = 40,
+  //! 64-bit unsigned integer type.
+  kUInt64 = 41,
+
+  _kFloatStart  = 42,
+  _kFloatEnd = 44,
+
+  //! 32-bit floating point type.
+  kFloat32 = 42,
+  //! 64-bit floating point type.
+  kFloat64 = 43,
+  //! 80-bit floating point type.
+  kFloat80 = 44,
+
+  _kMaskStart = 45,
+  _kMaskEnd = 48,
+
+  //! 8-bit opmask register (K).
+  kMask8 = 45,
+  //! 16-bit opmask register (K).
+  kMask16 = 46,
+  //! 32-bit opmask register (K).
+  kMask32 = 47,
+  //! 64-bit opmask register (K).
+  kMask64 = 48,
+
+  _kMmxStart = 49,
+  _kMmxEnd = 50,
+
+  //! 64-bit MMX register only used for 32 bits.
+  kMmx32 = 49,
+  //! 64-bit MMX register.
+  kMmx64 = 50,
+
+  _kVec32Start  = 51,
+  _kVec32End = 60,
+
+  kInt8x4 = 51,
+  kUInt8x4 = 52,
+  kInt16x2 = 53,
+  kUInt16x2 = 54,
+  kInt32x1 = 55,
+  kUInt32x1 = 56,
+  kFloat32x1 = 59,
+
+  _kVec64Start  = 61,
+  _kVec64End = 70,
+
+  kInt8x8 = 61,
+  kUInt8x8 = 62,
+  kInt16x4 = 63,
+  kUInt16x4 = 64,
+  kInt32x2 = 65,
+  kUInt32x2 = 66,
+  kInt64x1 = 67,
+  kUInt64x1 = 68,
+  kFloat32x2 = 69,
+  kFloat64x1 = 70,
+
+  _kVec128Start = 71,
+  _kVec128End = 80,
+
+  kInt8x16 = 71,
+  kUInt8x16 = 72,
+  kInt16x8 = 73,
+  kUInt16x8 = 74,
+  kInt32x4 = 75,
+  kUInt32x4 = 76,
+  kInt64x2 = 77,
+  kUInt64x2 = 78,
+  kFloat32x4 = 79,
+  kFloat64x2 = 80,
+
+  _kVec256Start = 81,
+  _kVec256End = 90,
+
+  kInt8x32 = 81,
+  kUInt8x32 = 82,
+  kInt16x16 = 83,
+  kUInt16x16 = 84,
+  kInt32x8 = 85,
+  kUInt32x8 = 86,
+  kInt64x4 = 87,
+  kUInt64x4 = 88,
+  kFloat32x8 = 89,
+  kFloat64x4 = 90,
+
+  _kVec512Start = 91,
+  _kVec512End = 100,
+
+  kInt8x64 = 91,
+  kUInt8x64 = 92,
+  kInt16x32 = 93,
+  kUInt16x32 = 94,
+  kInt32x16 = 95,
+  kUInt32x16 = 96,
+  kInt64x8 = 97,
+  kUInt64x8 = 98,
+  kFloat32x16 = 99,
+  kFloat64x8 = 100,
+
+  kLastAssigned = kFloat64x8,
+
+  kMaxValue = 255
+};
+ASMJIT_DEFINE_ENUM_COMPARE(TypeId)
+
+//! Type identifier utilities.
+namespace TypeUtils {
+
+struct TypeData {
+  TypeId scalarOf[uint32_t(TypeId::kMaxValue) + 1];
+  uint8_t sizeOf[uint32_t(TypeId::kMaxValue) + 1];
+};
+ASMJIT_VARAPI const TypeData _typeData;
+
+//! Returns the scalar type of `typeId`.
+static inline TypeId scalarOf(TypeId typeId) noexcept { return _typeData.scalarOf[uint32_t(typeId)]; }
+
+//! Returns the size [in bytes] of `typeId`.
+static inline uint32_t sizeOf(TypeId typeId) noexcept { return _typeData.sizeOf[uint32_t(typeId)]; }
+
+//! Tests whether a given type `typeId` is between `a` and `b`.
+static inline constexpr bool isBetween(TypeId typeId, TypeId a, TypeId b) noexcept {
+  return Support::isBetween(uint32_t(typeId), uint32_t(a), uint32_t(b));
+}
+
+//! Tests whether a given type `typeId` is \ref TypeId::kVoid.
+static inline constexpr bool isVoid(TypeId typeId) noexcept { return typeId == TypeId::kVoid; }
+//! Tests whether a given type `typeId` is a valid non-void type.
+static inline constexpr bool isValid(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kIntStart, TypeId::_kVec512End); }
+//! Tests whether a given type `typeId` is scalar (has no vector part).
+static inline constexpr bool isScalar(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kBaseStart, TypeId::_kBaseEnd); }
+//! Tests whether a given type `typeId` is abstract, which means that its size depends on register size.
+static inline constexpr bool isAbstract(TypeId typeId) noexcept { return isBetween(typeId, TypeId::kIntPtr, TypeId::kUIntPtr); }
+
+//! Tests whether a given type is a scalar integer (signed or unsigned) of any size.
+static inline constexpr bool isInt(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kIntStart, TypeId::_kIntEnd); }
+//! Tests whether a given type is a scalar 8-bit integer (signed).
+static inline constexpr bool isInt8(TypeId typeId) noexcept { return typeId == TypeId::kInt8; }
+//! Tests whether a given type is a scalar 8-bit integer (unsigned).
+static inline constexpr bool isUInt8(TypeId typeId) noexcept { return typeId == TypeId::kUInt8; }
+//! Tests whether a given type is a scalar 16-bit integer (signed).
+static inline constexpr bool isInt16(TypeId typeId) noexcept { return typeId == TypeId::kInt16; }
+//! Tests whether a given type is a scalar 16-bit integer (unsigned).
+static inline constexpr bool isUInt16(TypeId typeId) noexcept { return typeId == TypeId::kUInt16; }
+//! Tests whether a given type is a scalar 32-bit integer (signed).
+static inline constexpr bool isInt32(TypeId typeId) noexcept { return typeId == TypeId::kInt32; }
+//! Tests whether a given type is a scalar 32-bit integer (unsigned).
+static inline constexpr bool isUInt32(TypeId typeId) noexcept { return typeId == TypeId::kUInt32; }
+//! Tests whether a given type is a scalar 64-bit integer (signed).
+static inline constexpr bool isInt64(TypeId typeId) noexcept { return typeId == TypeId::kInt64; }
+//! Tests whether a given type is a scalar 64-bit integer (unsigned).
+static inline constexpr bool isUInt64(TypeId typeId) noexcept { return typeId == TypeId::kUInt64; }
+
+static inline constexpr bool isGp8(TypeId typeId) noexcept { return isBetween(typeId, TypeId::kInt8, TypeId::kUInt8); }
+static inline constexpr bool isGp16(TypeId typeId) noexcept { return isBetween(typeId, TypeId::kInt16, TypeId::kUInt16); }
+static inline constexpr bool isGp32(TypeId typeId) noexcept { return isBetween(typeId, TypeId::kInt32, TypeId::kUInt32); }
+static inline constexpr bool isGp64(TypeId typeId) noexcept { return isBetween(typeId, TypeId::kInt64, TypeId::kUInt64); }
+
+//! Tests whether a given type is a scalar floating point of any size.
+static inline constexpr bool isFloat(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kFloatStart, TypeId::_kFloatEnd); }
+//! Tests whether a given type is a scalar 32-bit float.
+static inline constexpr bool isFloat32(TypeId typeId) noexcept { return typeId == TypeId::kFloat32; }
+//! Tests whether a given type is a scalar 64-bit float.
+static inline constexpr bool isFloat64(TypeId typeId) noexcept { return typeId == TypeId::kFloat64; }
+//! Tests whether a given type is a scalar 80-bit float.
+static inline constexpr bool isFloat80(TypeId typeId) noexcept { return typeId == TypeId::kFloat80; }
+
+static inline constexpr bool isMask(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kMaskStart, TypeId::_kMaskEnd); }
+static inline constexpr bool isMask8(TypeId typeId) noexcept { return typeId == TypeId::kMask8; }
+static inline constexpr bool isMask16(TypeId typeId) noexcept { return typeId == TypeId::kMask16; }
+static inline constexpr bool isMask32(TypeId typeId) noexcept { return typeId == TypeId::kMask32; }
+static inline constexpr bool isMask64(TypeId typeId) noexcept { return typeId == TypeId::kMask64; }
+
+static inline constexpr bool isMmx(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kMmxStart, TypeId::_kMmxEnd); }
+static inline constexpr bool isMmx32(TypeId typeId) noexcept { return typeId == TypeId::kMmx32; }
+static inline constexpr bool isMmx64(TypeId typeId) noexcept { return typeId == TypeId::kMmx64; }
+
+static inline constexpr bool isVec(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kVec32Start, TypeId::_kVec512End); }
+static inline constexpr bool isVec32(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kVec32Start, TypeId::_kVec32End); }
+static inline constexpr bool isVec64(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kVec64Start, TypeId::_kVec64End); }
+static inline constexpr bool isVec128(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kVec128Start, TypeId::_kVec128End); }
+static inline constexpr bool isVec256(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kVec256Start, TypeId::_kVec256End); }
+static inline constexpr bool isVec512(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kVec512Start, TypeId::_kVec512End); }
+
+//! \cond
+enum TypeCategory : uint32_t {
+  kTypeCategoryUnknown = 0,
+  kTypeCategoryEnum = 1,
+  kTypeCategoryIntegral = 2,
+  kTypeCategoryFloatingPoint = 3,
+  kTypeCategoryFunction = 4
+};
+
+template<typename T, TypeCategory kCategory>
+struct TypeIdOfT_ByCategory {}; // Fails if not specialized.
+
+template<typename T>
+struct TypeIdOfT_ByCategory<T, kTypeCategoryIntegral> {
+  enum : uint32_t {
+    kTypeId = uint32_t(
+      (sizeof(T) == 1 &&  std::is_signed<T>::value) ? TypeId::kInt8 :
+      (sizeof(T) == 1 && !std::is_signed<T>::value) ? TypeId::kUInt8 :
+      (sizeof(T) == 2 &&  std::is_signed<T>::value) ? TypeId::kInt16 :
+      (sizeof(T) == 2 && !std::is_signed<T>::value) ? TypeId::kUInt16 :
+      (sizeof(T) == 4 &&  std::is_signed<T>::value) ? TypeId::kInt32 :
+      (sizeof(T) == 4 && !std::is_signed<T>::value) ? TypeId::kUInt32 :
+      (sizeof(T) == 8 &&  std::is_signed<T>::value) ? TypeId::kInt64 :
+      (sizeof(T) == 8 && !std::is_signed<T>::value) ? TypeId::kUInt64 : TypeId::kVoid)
+  };
+};
+
+template<typename T>
+struct TypeIdOfT_ByCategory<T, kTypeCategoryFloatingPoint> {
+  enum : uint32_t {
+    kTypeId = uint32_t(
+      (sizeof(T) == 4 ) ? TypeId::kFloat32 :
+      (sizeof(T) == 8 ) ? TypeId::kFloat64 :
+      (sizeof(T) >= 10) ? TypeId::kFloat80 : TypeId::kVoid)
+  };
+};
+
+template<typename T>
+struct TypeIdOfT_ByCategory<T, kTypeCategoryEnum>
+  : public TypeIdOfT_ByCategory<typename std::underlying_type<T>::type, kTypeCategoryIntegral> {};
+
+template<typename T>
+struct TypeIdOfT_ByCategory<T, kTypeCategoryFunction> {
+  enum : uint32_t {
+    kTypeId = uint32_t(TypeId::kUIntPtr)
+  };
+};
+//! \endcond
+
+//! TypeIdOfT<> template allows to get a TypeId from a C++ type `T`.
+#ifdef _DOXYGEN
+template<typename T>
+struct TypeIdOfT {
+  //! TypeId of C++ type `T`.
+  static constexpr TypeId kTypeId = _TypeIdDeducedAtCompileTime_;
+};
+#else
+template<typename T>
+struct TypeIdOfT
+  : public TypeIdOfT_ByCategory<T,
+    std::is_enum<T>::value           ? kTypeCategoryEnum          :
+    std::is_integral<T>::value       ? kTypeCategoryIntegral      :
+    std::is_floating_point<T>::value ? kTypeCategoryFloatingPoint :
+    std::is_function<T>::value       ? kTypeCategoryFunction      : kTypeCategoryUnknown> {};
+#endif
+
+//! \cond
+template<typename T>
+struct TypeIdOfT<T*> {
+  enum : uint32_t {
+    kTypeId = uint32_t(TypeId::kUIntPtr)
+  };
+};
+
+template<typename T>
+struct TypeIdOfT<T&> {
+  enum : uint32_t {
+    kTypeId = uint32_t(TypeId::kUIntPtr)
+  };
+};
+//! \endcond
+
+//! Returns a corresponding \ref TypeId of `T` type.
+template<typename T>
+static inline constexpr TypeId typeIdOfT() noexcept { return TypeId(TypeIdOfT<T>::kTypeId); }
+
+//! Returns offset needed to convert a `kIntPtr` and `kUIntPtr` TypeId into a type that matches `registerSize`
+//! (general-purpose register size). If you find such TypeId it's then only about adding the offset to it.
+//!
+//! For example:
+//!
+//! ```
+//! uint32_t registerSize = /* 4 or 8 */;
+//! uint32_t deabstractDelta = TypeUtils::deabstractDeltaOfSize(registerSize);
+//!
+//! TypeId typeId = 'some type-id';
+//!
+//! // Normalize some typeId into a non-abstract typeId.
+//! if (TypeUtils::isAbstract(typeId)) typeId += deabstractDelta;
+//!
+//! // The same, but by using TypeUtils::deabstract() function.
+//! typeId = TypeUtils::deabstract(typeId, deabstractDelta);
+//! ```
+static inline constexpr uint32_t deabstractDeltaOfSize(uint32_t registerSize) noexcept {
+  return registerSize >= 8 ? uint32_t(TypeId::kInt64) - uint32_t(TypeId::kIntPtr)
+                           : uint32_t(TypeId::kInt32) - uint32_t(TypeId::kIntPtr);
+}
+
+//! Deabstracts a given `typeId` into a native type by using `deabstractDelta`, which was previously
+//! calculated by calling \ref deabstractDeltaOfSize() with a target native register size.
+static inline constexpr TypeId deabstract(TypeId typeId, uint32_t deabstractDelta) noexcept {
+  return isAbstract(typeId) ? TypeId(uint32_t(typeId) + deabstractDelta) : typeId;
+}
+
+static inline constexpr TypeId scalarToVector(TypeId scalarTypeId, TypeId vecStartId) noexcept {
+  return TypeId(uint32_t(vecStartId) + uint32_t(scalarTypeId) - uint32_t(TypeId::kInt8));
+}
+
+} // {TypeUtils}
+
+//! Provides type identifiers that can be used in templates instead of native types.
+namespace Type {
+
+//! bool as C++ type-name.
+struct Bool {};
+//! int8_t as C++ type-name.
+struct Int8 {};
+//! uint8_t as C++ type-name.
+struct UInt8 {};
+//! int16_t as C++ type-name.
+struct Int16 {};
+//! uint16_t as C++ type-name.
+struct UInt16 {};
+//! int32_t as C++ type-name.
+struct Int32 {};
+//! uint32_t as C++ type-name.
+struct UInt32 {};
+//! int64_t as C++ type-name.
+struct Int64 {};
+//! uint64_t as C++ type-name.
+struct UInt64 {};
+//! intptr_t as C++ type-name.
+struct IntPtr {};
+//! uintptr_t as C++ type-name.
+struct UIntPtr {};
+//! float as C++ type-name.
+struct Float32 {};
+//! double as C++ type-name.
+struct Float64 {};
+
+} // {Type}
+
+//! \cond
+#define ASMJIT_DEFINE_TYPE_ID(T, TYPE_ID) \
+namespace TypeUtils {                     \
+  template<>                              \
+  struct TypeIdOfT<T> {                   \
+    enum : uint32_t {                     \
+      kTypeId = uint32_t(TYPE_ID)         \
+    };                                    \
+  };                                      \
+}
+
+ASMJIT_DEFINE_TYPE_ID(void         , TypeId::kVoid);
+ASMJIT_DEFINE_TYPE_ID(Type::Bool   , TypeId::kUInt8);
+ASMJIT_DEFINE_TYPE_ID(Type::Int8   , TypeId::kInt8);
+ASMJIT_DEFINE_TYPE_ID(Type::UInt8  , TypeId::kUInt8);
+ASMJIT_DEFINE_TYPE_ID(Type::Int16  , TypeId::kInt16);
+ASMJIT_DEFINE_TYPE_ID(Type::UInt16 , TypeId::kUInt16);
+ASMJIT_DEFINE_TYPE_ID(Type::Int32  , TypeId::kInt32);
+ASMJIT_DEFINE_TYPE_ID(Type::UInt32 , TypeId::kUInt32);
+ASMJIT_DEFINE_TYPE_ID(Type::Int64  , TypeId::kInt64);
+ASMJIT_DEFINE_TYPE_ID(Type::UInt64 , TypeId::kUInt64);
+ASMJIT_DEFINE_TYPE_ID(Type::IntPtr , TypeId::kIntPtr);
+ASMJIT_DEFINE_TYPE_ID(Type::UIntPtr, TypeId::kUIntPtr);
+ASMJIT_DEFINE_TYPE_ID(Type::Float32, TypeId::kFloat32);
+ASMJIT_DEFINE_TYPE_ID(Type::Float64, TypeId::kFloat64);
+//! \endcond
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_TYPE_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/virtmem.cpp b/lib/lepton/asmjit/core/virtmem.cpp
new file mode 100644
index 0000000000..43766ef2cd
--- /dev/null
+++ b/lib/lepton/asmjit/core/virtmem.cpp
@@ -0,0 +1,722 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_JIT
+
+#include "../core/osutils.h"
+#include "../core/string.h"
+#include "../core/support.h"
+#include "../core/virtmem.h"
+
+#if !defined(_WIN32)
+  #include <errno.h>
+  #include <fcntl.h>
+  #include <sys/mman.h>
+  #include <sys/stat.h>
+  #include <sys/types.h>
+  #include <unistd.h>
+
+  // Linux has a `memfd_create` syscall that we would like to use, if available.
+  #if defined(__linux__)
+    #include <sys/syscall.h>
+  #endif
+
+  // Apple recently introduced MAP_JIT flag, which we want to use.
+  #if defined(__APPLE__)
+    #include <pthread.h>
+    #include <TargetConditionals.h>
+    #if TARGET_OS_OSX
+      #include <sys/utsname.h>
+      #include <libkern/OSCacheControl.h> // sys_icache_invalidate().
+    #endif
+    // Older SDK doesn't define `MAP_JIT`.
+    #ifndef MAP_JIT
+      #define MAP_JIT 0x800
+    #endif
+  #endif
+
+  // BSD/MAC: `MAP_ANONYMOUS` is not defined, `MAP_ANON` is.
+  #if !defined(MAP_ANONYMOUS)
+    #define MAP_ANONYMOUS MAP_ANON
+  #endif
+#endif
+
+#include <atomic>
+
+#if defined(__APPLE__) || defined(__BIONIC__)
+  #define ASMJIT_VM_SHM_DETECT 0
+#else
+  #define ASMJIT_VM_SHM_DETECT 1
+#endif
+
+// Android NDK doesn't provide `shm_open()` and `shm_unlink()`.
+#if defined(__BIONIC__)
+  #define ASMJIT_VM_SHM_AVAILABLE 0
+#else
+  #define ASMJIT_VM_SHM_AVAILABLE 1
+#endif
+
+#if defined(__APPLE__) && ASMJIT_ARCH_ARM >= 64
+  #define ASMJIT_HAS_PTHREAD_JIT_WRITE_PROTECT_NP
+#endif
+
+ASMJIT_BEGIN_SUB_NAMESPACE(VirtMem)
+
+// Virtual Memory Utilities
+// ========================
+
+static const MemoryFlags dualMappingFilter[2] = {
+  MemoryFlags::kAccessWrite | MemoryFlags::kMMapMaxAccessWrite,
+  MemoryFlags::kAccessExecute | MemoryFlags::kMMapMaxAccessExecute
+};
+
+// Virtual Memory [Windows]
+// ========================
+
+#if defined(_WIN32)
+
+struct ScopedHandle {
+  inline ScopedHandle() noexcept
+    : value(nullptr) {}
+
+  inline ~ScopedHandle() noexcept {
+    if (value != nullptr)
+      ::CloseHandle(value);
+  }
+
+  HANDLE value;
+};
+
+static void getVMInfo(Info& vmInfo) noexcept {
+  SYSTEM_INFO systemInfo;
+
+  ::GetSystemInfo(&systemInfo);
+  vmInfo.pageSize = Support::alignUpPowerOf2<uint32_t>(systemInfo.dwPageSize);
+  vmInfo.pageGranularity = systemInfo.dwAllocationGranularity;
+}
+
+// Returns windows-specific protectFlags from \ref MemoryFlags.
+static DWORD protectFlagsFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
+  DWORD protectFlags;
+
+  // READ|WRITE|EXECUTE.
+  if (Support::test(memoryFlags, MemoryFlags::kAccessExecute))
+    protectFlags = Support::test(memoryFlags, MemoryFlags::kAccessWrite) ? PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ;
+  else if (Support::test(memoryFlags, MemoryFlags::kAccessRW))
+    protectFlags = Support::test(memoryFlags, MemoryFlags::kAccessWrite) ? PAGE_READWRITE : PAGE_READONLY;
+  else
+    protectFlags = PAGE_NOACCESS;
+
+  // Any other flags to consider?
+  return protectFlags;
+}
+
+static DWORD desiredAccessFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
+  DWORD access = Support::test(memoryFlags, MemoryFlags::kAccessWrite) ? FILE_MAP_WRITE : FILE_MAP_READ;
+  if (Support::test(memoryFlags, MemoryFlags::kAccessExecute))
+    access |= FILE_MAP_EXECUTE;
+  return access;
+}
+
+static HardenedRuntimeFlags getHardenedRuntimeFlags() noexcept {
+  return HardenedRuntimeFlags::kNone;
+}
+
+Error alloc(void** p, size_t size, MemoryFlags memoryFlags) noexcept {
+  *p = nullptr;
+  if (size == 0)
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  DWORD protectFlags = protectFlagsFromMemoryFlags(memoryFlags);
+  void* result = ::VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, protectFlags);
+
+  if (!result)
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  *p = result;
+  return kErrorOk;
+}
+
+Error release(void* p, size_t size) noexcept {
+  DebugUtils::unused(size);
+  if (ASMJIT_UNLIKELY(!::VirtualFree(p, 0, MEM_RELEASE)))
+    return DebugUtils::errored(kErrorInvalidArgument);
+  return kErrorOk;
+}
+
+Error protect(void* p, size_t size, MemoryFlags memoryFlags) noexcept {
+  DWORD protectFlags = protectFlagsFromMemoryFlags(memoryFlags);
+  DWORD oldFlags;
+
+  if (::VirtualProtect(p, size, protectFlags, &oldFlags))
+    return kErrorOk;
+
+  return DebugUtils::errored(kErrorInvalidArgument);
+}
+
+Error allocDualMapping(DualMapping* dm, size_t size, MemoryFlags memoryFlags) noexcept {
+  dm->rx = nullptr;
+  dm->rw = nullptr;
+
+  if (size == 0)
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  ScopedHandle handle;
+  handle.value = ::CreateFileMappingW(
+    INVALID_HANDLE_VALUE,
+    nullptr,
+    PAGE_EXECUTE_READWRITE,
+    (DWORD)(uint64_t(size) >> 32),
+    (DWORD)(size & 0xFFFFFFFFu),
+    nullptr);
+
+  if (ASMJIT_UNLIKELY(!handle.value))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  void* ptr[2];
+  for (uint32_t i = 0; i < 2; i++) {
+    MemoryFlags accessFlags = memoryFlags & ~dualMappingFilter[i];
+    DWORD desiredAccess = desiredAccessFromMemoryFlags(accessFlags);
+    ptr[i] = ::MapViewOfFile(handle.value, desiredAccess, 0, 0, size);
+
+    if (ptr[i] == nullptr) {
+      if (i == 0)
+        ::UnmapViewOfFile(ptr[0]);
+      return DebugUtils::errored(kErrorOutOfMemory);
+    }
+  }
+
+  dm->rx = ptr[0];
+  dm->rw = ptr[1];
+  return kErrorOk;
+}
+
+Error releaseDualMapping(DualMapping* dm, size_t size) noexcept {
+  DebugUtils::unused(size);
+  bool failed = false;
+
+  if (!::UnmapViewOfFile(dm->rx))
+    failed = true;
+
+  if (dm->rx != dm->rw && !UnmapViewOfFile(dm->rw))
+    failed = true;
+
+  if (failed)
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  dm->rx = nullptr;
+  dm->rw = nullptr;
+  return kErrorOk;
+}
+
+#endif
+
+// Virtual Memory [Posix]
+// ======================
+
+#if !defined(_WIN32)
+
+static void getVMInfo(Info& vmInfo) noexcept {
+  uint32_t pageSize = uint32_t(::getpagesize());
+
+  vmInfo.pageSize = pageSize;
+  vmInfo.pageGranularity = Support::max<uint32_t>(pageSize, 65536);
+}
+
+#if !defined(SHM_ANON)
+static const char* getTmpDir() noexcept {
+  const char* tmpDir = getenv("TMPDIR");
+  return tmpDir ? tmpDir : "/tmp";
+}
+#endif
+
+// Translates libc errors specific to VirtualMemory mapping to `asmjit::Error`.
+static Error asmjitErrorFromErrno(int e) noexcept {
+  switch (e) {
+    case EACCES:
+    case EAGAIN:
+    case ENODEV:
+    case EPERM:
+      return kErrorInvalidState;
+
+    case EFBIG:
+    case ENOMEM:
+    case EOVERFLOW:
+      return kErrorOutOfMemory;
+
+    case EMFILE:
+    case ENFILE:
+      return kErrorTooManyHandles;
+
+    default:
+      return kErrorInvalidArgument;
+  }
+}
+
+// Some operating systems don't allow /dev/shm to be executable. On Linux this happens when /dev/shm is mounted with
+// 'noexec', which is enforced by systemd. Other operating systems like MacOS also restrict executable permissions
+// regarding /dev/shm, so we use a runtime detection before attempting to allocate executable memory. Sometimes we
+// don't need the detection as we know it would always result in `ShmStrategy::kTmpDir`.
+enum class ShmStrategy : uint32_t {
+  kUnknown = 0,
+  kDevShm = 1,
+  kTmpDir = 2
+};
+
+class AnonymousMemory {
+public:
+  enum FileType : uint32_t {
+    kFileTypeNone,
+    kFileTypeShm,
+    kFileTypeTmp
+  };
+
+  int _fd;
+  FileType _fileType;
+  StringTmp<128> _tmpName;
+
+  inline AnonymousMemory() noexcept
+    : _fd(-1),
+      _fileType(kFileTypeNone),
+      _tmpName() {}
+
+  inline ~AnonymousMemory() noexcept {
+    unlink();
+    close();
+  }
+
+  inline int fd() const noexcept { return _fd; }
+
+  Error open(bool preferTmpOverDevShm) noexcept {
+#if defined(__linux__) && defined(__NR_memfd_create)
+    // Linux specific 'memfd_create' - if the syscall returns `ENOSYS` it means
+    // it's not available and we will never call it again (would be pointless).
+
+    // Zero initialized, if ever changed to '1' that would mean the syscall is not
+    // available and we must use `shm_open()` and `shm_unlink()`.
+    static volatile uint32_t memfd_create_not_supported;
+
+    if (!memfd_create_not_supported) {
+      _fd = (int)syscall(__NR_memfd_create, "vmem", 0);
+      if (ASMJIT_LIKELY(_fd >= 0))
+        return kErrorOk;
+
+      int e = errno;
+      if (e == ENOSYS)
+        memfd_create_not_supported = 1;
+      else
+        return DebugUtils::errored(asmjitErrorFromErrno(e));
+    }
+#endif
+
+#if defined(SHM_ANON)
+    // Originally FreeBSD extension, apparently works in other BSDs too.
+    DebugUtils::unused(preferTmpOverDevShm);
+    _fd = ::shm_open(SHM_ANON, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+
+    if (ASMJIT_LIKELY(_fd >= 0))
+      return kErrorOk;
+    else
+      return DebugUtils::errored(asmjitErrorFromErrno(errno));
+#else
+    // POSIX API. We have to generate somehow a unique name. This is nothing cryptographic, just using a bit from
+    // the stack address to always have a different base for different threads (as threads have their own stack)
+    // and retries for avoiding collisions. We use `shm_open()` with flags that require creation of the file so we
+    // never open an existing shared memory.
+    static std::atomic<uint32_t> internalCounter;
+    const char* kShmFormat = "/shm-id-%016llX";
+
+    uint32_t kRetryCount = 100;
+    uint64_t bits = ((uintptr_t)(void*)this) & 0x55555555u;
+
+    for (uint32_t i = 0; i < kRetryCount; i++) {
+      bits -= uint64_t(OSUtils::getTickCount()) * 773703683;
+      bits = ((bits >> 14) ^ (bits << 6)) + uint64_t(++internalCounter) * 10619863;
+
+      bool useTmp = !ASMJIT_VM_SHM_DETECT || preferTmpOverDevShm;
+
+      if (useTmp) {
+        _tmpName.assign(getTmpDir());
+        _tmpName.appendFormat(kShmFormat, (unsigned long long)bits);
+        _fd = ::open(_tmpName.data(), O_RDWR | O_CREAT | O_EXCL, 0);
+        if (ASMJIT_LIKELY(_fd >= 0)) {
+          _fileType = kFileTypeTmp;
+          return kErrorOk;
+        }
+      }
+#if ASMJIT_VM_SHM_AVAILABLE
+      else {
+        _tmpName.assignFormat(kShmFormat, (unsigned long long)bits);
+        _fd = ::shm_open(_tmpName.data(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+        if (ASMJIT_LIKELY(_fd >= 0)) {
+          _fileType = kFileTypeShm;
+          return kErrorOk;
+        }
+      }
+#endif
+
+      int e = errno;
+      if (e != EEXIST)
+        return DebugUtils::errored(asmjitErrorFromErrno(e));
+    }
+
+    return DebugUtils::errored(kErrorFailedToOpenAnonymousMemory);
+#endif
+  }
+
+  void unlink() noexcept {
+    FileType type = _fileType;
+    _fileType = kFileTypeNone;
+
+#if ASMJIT_VM_SHM_AVAILABLE
+    if (type == kFileTypeShm) {
+      ::shm_unlink(_tmpName.data());
+      return;
+    }
+#endif
+
+    if (type == kFileTypeTmp) {
+      ::unlink(_tmpName.data());
+      return;
+    }
+  }
+
+  void close() noexcept {
+    if (_fd >= 0) {
+      ::close(_fd);
+      _fd = -1;
+    }
+  }
+
+  Error allocate(size_t size) noexcept {
+    // TODO: Improve this by using `posix_fallocate()` when available.
+    if (ftruncate(_fd, off_t(size)) != 0)
+      return DebugUtils::errored(asmjitErrorFromErrno(errno));
+
+    return kErrorOk;
+  }
+};
+
+// Returns `mmap()` protection flags from \ref MemoryFlags.
+static int mmProtFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
+  int protection = 0;
+  if (Support::test(memoryFlags, MemoryFlags::kAccessRead)) protection |= PROT_READ;
+  if (Support::test(memoryFlags, MemoryFlags::kAccessWrite)) protection |= PROT_READ | PROT_WRITE;
+  if (Support::test(memoryFlags, MemoryFlags::kAccessExecute)) protection |= PROT_READ | PROT_EXEC;
+  return protection;
+}
+
+#if defined(__APPLE__)
+// Detects whether the current process is hardened, which means that pages that have WRITE and EXECUTABLE flags cannot
+// be allocated without MAP_JIT flag.
+static inline bool hasHardenedRuntimeMacOS() noexcept {
+#if TARGET_OS_OSX && ASMJIT_ARCH_ARM >= 64
+  // MacOS on AArch64 has always hardened runtime enabled.
+  return true;
+#else
+  static std::atomic<uint32_t> globalHardenedFlag;
+
+  enum HardenedFlag : uint32_t {
+    kHardenedFlagUnknown  = 0,
+    kHardenedFlagDisabled = 1,
+    kHardenedFlagEnabled  = 2
+  };
+
+  uint32_t flag = globalHardenedFlag.load();
+  if (flag == kHardenedFlagUnknown) {
+    size_t pageSize = ::getpagesize();
+
+    void* ptr = mmap(nullptr, pageSize, PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    if (ptr == MAP_FAILED) {
+      flag = kHardenedFlagEnabled;
+    }
+    else {
+      flag = kHardenedFlagDisabled;
+      munmap(ptr, pageSize);
+    }
+    globalHardenedFlag.store(flag);
+  }
+
+  return flag == kHardenedFlagEnabled;
+#endif
+}
+
+static inline bool hasMapJitSupportMacOS() noexcept {
+#if TARGET_OS_OSX && ASMJIT_ARCH_ARM >= 64
+  // MacOS for 64-bit AArch architecture always uses hardened runtime. Some documentation can be found here:
+  //   - https://developer.apple.com/documentation/apple_silicon/porting_just-in-time_compilers_to_apple_silicon
+  return true;
+#elif TARGET_OS_OSX
+  // MAP_JIT flag required to run unsigned JIT code is only supported by kernel version 10.14+ (Mojave) and IOS.
+  static std::atomic<uint32_t> globalVersion;
+
+  int ver = globalVersion.load();
+  if (!ver) {
+    struct utsname osname {};
+    uname(&osname);
+    ver = atoi(osname.release);
+    globalVersion.store(ver);
+  }
+  return ver >= 18;
+#else
+  // Assume it's available.
+  return true;
+#endif
+}
+#endif // __APPLE__
+
+// Detects whether the current process is hardened, which means that pages that have WRITE and EXECUTABLE flags
+// cannot be normally allocated. On MacOS such allocation requires MAP_JIT flag.
+static inline bool hasHardenedRuntime() noexcept {
+#if defined(__APPLE__)
+  return hasHardenedRuntimeMacOS();
+#else
+  return false;
+#endif
+}
+
+// Detects whether MAP_JIT is available.
+static inline bool hasMapJitSupport() noexcept {
+#if defined(__APPLE__)
+  return hasMapJitSupportMacOS();
+#else
+  return false;
+#endif
+}
+
+// Returns either MAP_JIT or 0 based on `flags` and the host operating system.
+static inline int mmMapJitFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
+#if defined(__APPLE__)
+  // Always use MAP_JIT flag if user asked for it (could be used for testing on non-hardened processes) and detect
+  // whether it must be used when the process is actually hardened (in that case it doesn't make sense to rely on
+  // user `memoryFlags`).
+  bool useMapJit = Support::test(memoryFlags, MemoryFlags::kMMapEnableMapJit) || hasHardenedRuntime();
+  if (useMapJit)
+    return hasMapJitSupport() ? int(MAP_JIT) : 0;
+  else
+    return 0;
+#else
+  DebugUtils::unused(memoryFlags);
+  return 0;
+#endif
+}
+
+// Returns BSD-specific `PROT_MAX()` flags.
+static inline int mmMaxProtFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
+#if defined(PROT_MAX)
+  static constexpr uint32_t kMaxProtShift = Support::ConstCTZ<uint32_t(MemoryFlags::kMMapMaxAccessRead)>::value;
+
+  if (Support::test(memoryFlags, MemoryFlags::kMMapMaxAccessReadWrite | MemoryFlags::kMMapMaxAccessExecute))
+    return PROT_MAX(mmProtFromMemoryFlags((MemoryFlags)(uint32_t(memoryFlags) >> kMaxProtShift)));
+  else
+    return 0;
+#else
+  DebugUtils::unused(memoryFlags);
+  return 0;
+#endif
+}
+
+#if ASMJIT_VM_SHM_DETECT
+static Error detectShmStrategy(ShmStrategy* strategyOut) noexcept {
+  AnonymousMemory anonMem;
+  Info vmInfo = info();
+
+  ASMJIT_PROPAGATE(anonMem.open(false));
+  ASMJIT_PROPAGATE(anonMem.allocate(vmInfo.pageSize));
+
+  void* ptr = mmap(nullptr, vmInfo.pageSize, PROT_READ | PROT_EXEC, MAP_SHARED, anonMem.fd(), 0);
+  if (ptr == MAP_FAILED) {
+    int e = errno;
+    if (e == EINVAL) {
+      *strategyOut = ShmStrategy::kTmpDir;
+      return kErrorOk;
+    }
+    return DebugUtils::errored(asmjitErrorFromErrno(e));
+  }
+  else {
+    munmap(ptr, vmInfo.pageSize);
+    *strategyOut = ShmStrategy::kDevShm;
+    return kErrorOk;
+  }
+}
+#endif
+
+static Error getShmStrategy(ShmStrategy* strategyOut) noexcept {
+#if ASMJIT_VM_SHM_DETECT
+  // Initially don't assume anything. It has to be tested whether '/dev/shm' was mounted with 'noexec' flag or not.
+  static std::atomic<uint32_t> globalShmStrategy;
+
+  ShmStrategy strategy = static_cast<ShmStrategy>(globalShmStrategy.load());
+  if (strategy == ShmStrategy::kUnknown) {
+    ASMJIT_PROPAGATE(detectShmStrategy(&strategy));
+    globalShmStrategy.store(static_cast<uint32_t>(strategy));
+  }
+
+  *strategyOut = strategy;
+  return kErrorOk;
+#else
+  *strategyOut = ShmStrategy::kTmpDir;
+  return kErrorOk;
+#endif
+}
+
+static HardenedRuntimeFlags getHardenedRuntimeFlags() noexcept {
+  HardenedRuntimeFlags hrFlags = HardenedRuntimeFlags::kNone;
+
+  if (hasHardenedRuntime())
+    hrFlags |= HardenedRuntimeFlags::kEnabled;
+
+  if (hasMapJitSupport())
+    hrFlags |= HardenedRuntimeFlags::kMapJit;
+
+  return hrFlags;
+}
+
+Error alloc(void** p, size_t size, MemoryFlags memoryFlags) noexcept {
+  *p = nullptr;
+  if (size == 0)
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  int protection = mmProtFromMemoryFlags(memoryFlags) | mmMaxProtFromMemoryFlags(memoryFlags);
+  int mmFlags = MAP_PRIVATE | MAP_ANONYMOUS | mmMapJitFromMemoryFlags(memoryFlags);
+
+  void* ptr = mmap(nullptr, size, protection, mmFlags, -1, 0);
+  if (ptr == MAP_FAILED)
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  *p = ptr;
+  return kErrorOk;
+}
+
+Error release(void* p, size_t size) noexcept {
+  if (ASMJIT_UNLIKELY(munmap(p, size) != 0))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  return kErrorOk;
+}
+
+
+Error protect(void* p, size_t size, MemoryFlags memoryFlags) noexcept {
+  int protection = mmProtFromMemoryFlags(memoryFlags);
+  if (mprotect(p, size, protection) == 0)
+    return kErrorOk;
+
+  return DebugUtils::errored(kErrorInvalidArgument);
+}
+
+Error allocDualMapping(DualMapping* dm, size_t size, MemoryFlags memoryFlags) noexcept {
+  dm->rx = nullptr;
+  dm->rw = nullptr;
+
+  if (off_t(size) <= 0)
+    return DebugUtils::errored(size == 0 ? kErrorInvalidArgument : kErrorTooLarge);
+
+  bool preferTmpOverDevShm = Support::test(memoryFlags, MemoryFlags::kMappingPreferTmp);
+  if (!preferTmpOverDevShm) {
+    ShmStrategy strategy;
+    ASMJIT_PROPAGATE(getShmStrategy(&strategy));
+    preferTmpOverDevShm = (strategy == ShmStrategy::kTmpDir);
+  }
+
+  AnonymousMemory anonMem;
+  ASMJIT_PROPAGATE(anonMem.open(preferTmpOverDevShm));
+  ASMJIT_PROPAGATE(anonMem.allocate(size));
+
+  void* ptr[2];
+  for (uint32_t i = 0; i < 2; i++) {
+    MemoryFlags accessFlags = memoryFlags & ~dualMappingFilter[i];
+    int protection = mmProtFromMemoryFlags(accessFlags) | mmMaxProtFromMemoryFlags(accessFlags);
+
+    ptr[i] = mmap(nullptr, size, protection, MAP_SHARED, anonMem.fd(), 0);
+    if (ptr[i] == MAP_FAILED) {
+      // Get the error now before `munmap()` has a chance to clobber it.
+      int e = errno;
+      if (i == 1)
+        munmap(ptr[0], size);
+      return DebugUtils::errored(asmjitErrorFromErrno(e));
+    }
+  }
+
+  dm->rx = ptr[0];
+  dm->rw = ptr[1];
+  return kErrorOk;
+}
+
+Error releaseDualMapping(DualMapping* dm, size_t size) noexcept {
+  Error err = release(dm->rx, size);
+  if (dm->rx != dm->rw)
+    err |= release(dm->rw, size);
+
+  if (err)
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  dm->rx = nullptr;
+  dm->rw = nullptr;
+  return kErrorOk;
+}
+#endif
+
+// Virtual Memory - Flush Instruction Cache
+// ========================================
+
+void flushInstructionCache(void* p, size_t size) noexcept {
+#if ASMJIT_ARCH_X86
+  // X86/X86_64 architecture doesn't require to do anything to flush instruction cache.
+  DebugUtils::unused(p, size);
+#elif defined(__APPLE__)
+  sys_icache_invalidate(p, size);
+#elif defined(_WIN32)
+  // Windows has a built-in support in `kernel32.dll`.
+  FlushInstructionCache(GetCurrentProcess(), p, size);
+#elif defined(__GNUC__)
+  char* start = static_cast<char*>(p);
+  char* end = start + size;
+  __builtin___clear_cache(start, end);
+#else
+  #pragma message("asmjit::VirtMem::flushInstructionCache() doesn't have implementation for the target OS and compiler")
+  DebugUtils::unused(p, size);
+#endif
+}
+
+// Virtual Memory - Memory Info
+// ============================
+
+Info info() noexcept {
+  static std::atomic<uint32_t> vmInfoInitialized;
+  static Info vmInfo;
+
+  if (!vmInfoInitialized.load()) {
+    Info localMemInfo;
+    getVMInfo(localMemInfo);
+
+    vmInfo = localMemInfo;
+    vmInfoInitialized.store(1u);
+  }
+
+  return vmInfo;
+}
+
+// Virtual Memory - Hardened Runtime Info
+// ======================================
+
+HardenedRuntimeInfo hardenedRuntimeInfo() noexcept {
+  return HardenedRuntimeInfo { getHardenedRuntimeFlags() };
+}
+
+// Virtual Memory - Project JIT Memory
+// ===================================
+
+void protectJitMemory(ProtectJitAccess access) noexcept {
+#if defined(ASMJIT_HAS_PTHREAD_JIT_WRITE_PROTECT_NP)
+  pthread_jit_write_protect_np(static_cast<uint32_t>(access));
+#else
+  DebugUtils::unused(access);
+#endif
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif
diff --git a/lib/lepton/asmjit/core/virtmem.h b/lib/lepton/asmjit/core/virtmem.h
new file mode 100644
index 0000000000..50f09457eb
--- /dev/null
+++ b/lib/lepton/asmjit/core/virtmem.h
@@ -0,0 +1,242 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_VIRTMEM_H_INCLUDED
+#define ASMJIT_CORE_VIRTMEM_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_JIT
+
+#include "../core/globals.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_virtual_memory
+//! \{
+
+//! Virtual memory management.
+namespace VirtMem {
+
+//! Flushes instruction cache in the given region.
+//!
+//! Only useful on non-x86 architectures, however, it's a good practice to call it on any platform to make your
+//! code more portable.
+ASMJIT_API void flushInstructionCache(void* p, size_t size) noexcept;
+
+//! Virtual memory information.
+struct Info {
+  //! Virtual memory page size.
+  uint32_t pageSize;
+  //! Virtual memory page granularity.
+  uint32_t pageGranularity;
+};
+
+//! Returns virtual memory information, see `VirtMem::Info` for more details.
+ASMJIT_API Info info() noexcept;
+
+//! Virtual memory access and mmap-specific flags.
+enum class MemoryFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+
+  //! Memory is readable.
+  kAccessRead = 0x00000001u,
+
+  //! Memory is writable.
+  kAccessWrite = 0x00000002u,
+
+  //! Memory is executable.
+  kAccessExecute = 0x00000004u,
+
+  //! A combination of \ref MemoryFlags::kAccessRead and \ref MemoryFlags::kAccessWrite.
+  kAccessReadWrite = kAccessRead | kAccessWrite,
+
+  //! A combination of \ref MemoryFlags::kAccessRead, \ref MemoryFlags::kAccessWrite.
+  kAccessRW = kAccessRead | kAccessWrite,
+
+  //! A combination of \ref MemoryFlags::kAccessRead and \ref MemoryFlags::kAccessExecute.
+  kAccessRX = kAccessRead | kAccessExecute,
+
+  //! A combination of \ref MemoryFlags::kAccessRead, \ref MemoryFlags::kAccessWrite, and
+  //! \ref MemoryFlags::kAccessExecute.
+  kAccessRWX = kAccessRead | kAccessWrite | kAccessExecute,
+
+  //! Use a `MAP_JIT` flag available on Apple platforms (introduced by Mojave), which allows JIT code to be executed
+  //! in MAC bundles. This flag is not turned on by default, because when a process uses `fork()` the child process
+  //! has no access to the pages mapped with `MAP_JIT`, which could break code that doesn't expect this behavior.
+  //!
+  //! \note This flag can only be used with \ref VirtMem::alloc().
+  kMMapEnableMapJit = 0x00000010u,
+
+  //! Pass `PROT_MAX(PROT_READ)` to mmap() on platforms that support `PROT_MAX`.
+  //!
+  //! \note This flag can only be used with \ref VirtMem::alloc().
+  kMMapMaxAccessRead = 0x00000020u,
+  //! Pass `PROT_MAX(PROT_WRITE)` to mmap() on platforms that support `PROT_MAX`.
+  //!
+  //! \note This flag can only be used with \ref VirtMem::alloc().
+  kMMapMaxAccessWrite = 0x00000040u,
+  //! Pass `PROT_MAX(PROT_EXEC)` to mmap() on platforms that support `PROT_MAX`.
+  //!
+  //! \note This flag can only be used with \ref VirtMem::alloc().
+  kMMapMaxAccessExecute = 0x00000080u,
+
+  //! A combination of \ref MemoryFlags::kMMapMaxAccessRead and \ref MemoryFlags::kMMapMaxAccessWrite.
+  kMMapMaxAccessReadWrite = kMMapMaxAccessRead | kMMapMaxAccessWrite,
+
+  //! A combination of \ref MemoryFlags::kMMapMaxAccessRead and \ref MemoryFlags::kMMapMaxAccessWrite.
+  kMMapMaxAccessRW = kMMapMaxAccessRead | kMMapMaxAccessWrite,
+
+  //! A combination of \ref MemoryFlags::kMMapMaxAccessRead and \ref MemoryFlags::kMMapMaxAccessExecute.
+  kMMapMaxAccessRX = kMMapMaxAccessRead | kMMapMaxAccessExecute,
+
+  //! A combination of \ref MemoryFlags::kMMapMaxAccessRead, \ref MemoryFlags::kMMapMaxAccessWrite, \ref
+  //! MemoryFlags::kMMapMaxAccessExecute.
+  kMMapMaxAccessRWX = kMMapMaxAccessRead | kMMapMaxAccessWrite | kMMapMaxAccessExecute,
+
+  //! Not an access flag, only used by `allocDualMapping()` to override the default allocation strategy to always use
+  //! a 'tmp' directory instead of "/dev/shm" (on POSIX platforms). Please note that this flag will be ignored if the
+  //! operating system allows to allocate an executable memory by a different API than `open()` or `shm_open()`. For
+  //! example on Linux `memfd_create()` is preferred and on BSDs `shm_open(SHM_ANON, ...)` is used if SHM_ANON is
+  //! defined.
+  //!
+  //! \note This flag can only be used with \ref VirtMem::alloc().
+  kMappingPreferTmp = 0x80000000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(MemoryFlags)
+
+//! Allocates virtual memory by either using `mmap()` (POSIX) or `VirtualAlloc()` (Windows).
+//!
+//! \note `size` should be aligned to page size, use \ref VirtMem::info() to obtain it. Invalid size will not be
+//! corrected by the implementation and the allocation would not succeed in such case.
+ASMJIT_API Error alloc(void** p, size_t size, MemoryFlags flags) noexcept;
+
+//! Releases virtual memory previously allocated by \ref VirtMem::alloc().
+//!
+//! \note The size must be the same as used by \ref VirtMem::alloc(). If the size is not the same value the call
+//! will fail on any POSIX system, but pass on Windows, because it's implemented differently.
+ASMJIT_API Error release(void* p, size_t size) noexcept;
+
+//! A cross-platform wrapper around `mprotect()` (POSIX) and `VirtualProtect()` (Windows).
+ASMJIT_API Error protect(void* p, size_t size, MemoryFlags flags) noexcept;
+
+//! Dual memory mapping used to map an anonymous memory into two memory regions where one region is read-only, but
+//! executable, and the second region is read+write, but not executable. See \ref VirtMem::allocDualMapping() for
+//! more details.
+struct DualMapping {
+  //! Pointer to data with 'Read+Execute' access (this memory is not writable).
+  void* rx;
+  //! Pointer to data with 'Read+Write' access (this memory is not executable).
+  void* rw;
+};
+
+//! Allocates virtual memory and creates two views of it where the first view has no write access. This is an addition
+//! to the API that should be used in cases in which the operating system either enforces W^X security policy or the
+//! application wants to use this policy by default to improve security and prevent an accidental (or purposed)
+//! self-modifying code.
+//!
+//! The memory returned in the `dm` are two independent mappings of the same shared memory region. You must use
+//! \ref VirtMem::releaseDualMapping() to release it when it's no longer needed. Never use `VirtMem::release()` to
+//! release the memory returned by `allocDualMapping()` as that would fail on Windows.
+//!
+//! \remarks Both pointers in `dm` would be set to `nullptr` if the function fails.
+ASMJIT_API Error allocDualMapping(DualMapping* dm, size_t size, MemoryFlags flags) noexcept;
+
+//! Releases virtual memory mapping previously allocated by \ref VirtMem::allocDualMapping().
+//!
+//! \remarks Both pointers in `dm` would be set to `nullptr` if the function succeeds.
+ASMJIT_API Error releaseDualMapping(DualMapping* dm, size_t size) noexcept;
+
+//! Hardened runtime flags.
+enum class HardenedRuntimeFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+
+  //! Hardened runtime is enabled - it's not possible to have "Write & Execute" memory protection. The runtime
+  //! enforces W^X (either write or execute).
+  //!
+  //! \note If the runtime is hardened it means that an operating system specific protection is used. For example on
+  //! MacOS platform it's possible to allocate memory with MAP_JIT flag and then use `pthread_jit_write_protect_np()`
+  //! to temporarily swap access permissions for the current thread. Dual mapping is also a possibility on X86/X64
+  //! architecture.
+  kEnabled = 0x00000001u,
+
+  //! Read+Write+Execute can only be allocated with MAP_JIT flag (Apple specific).
+  kMapJit = 0x00000002u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(HardenedRuntimeFlags)
+
+//! Hardened runtime information.
+struct HardenedRuntimeInfo {
+  //! Hardened runtime flags.
+  HardenedRuntimeFlags flags;
+};
+
+//! Returns runtime features provided by the OS.
+ASMJIT_API HardenedRuntimeInfo hardenedRuntimeInfo() noexcept;
+
+//! Values that can be used with `protectJitMemory()` function.
+enum class ProtectJitAccess : uint32_t {
+  //! Protect JIT memory with Read+Write permissions.
+  kReadWrite = 0,
+  //! Protect JIT memory with Read+Execute permissions.
+  kReadExecute = 1
+};
+
+//! Protects access of memory mapped with MAP_JIT flag for the current thread.
+//!
+//! \note This feature is only available on Apple hardware (AArch64) at the moment and and uses a non-portable
+//! `pthread_jit_write_protect_np()` call when available.
+//!
+//! This function must be called before and after a memory mapped with MAP_JIT flag is modified. Example:
+//!
+//! ```
+//! void* codePtr = ...;
+//! size_t codeSize = ...;
+//!
+//! VirtMem::protectJitMemory(VirtMem::ProtectJitAccess::kReadWrite);
+//! memcpy(codePtr, source, codeSize);
+//! VirtMem::protectJitMemory(VirtMem::ProtectJitAccess::kReadExecute);
+//! VirtMem::flushInstructionCache(codePtr, codeSize);
+//! ```
+//!
+//! See \ref ProtectJitReadWriteScope, which makes it simpler than the code above.
+ASMJIT_API void protectJitMemory(ProtectJitAccess access) noexcept;
+
+//! JIT protection scope that prepares the given memory block to be written to in the current thread.
+//!
+//! It calls `VirtMem::protectJitMemory(VirtMem::ProtectJitAccess::kReadWrite)` at construction time and
+//! `VirtMem::protectJitMemory(VirtMem::ProtectJitAccess::kReadExecute)` combined with `flushInstructionCache()`
+//! in destructor. The purpose of this class is to make writing to JIT memory easier.
+class ProtectJitReadWriteScope {
+public:
+  void* _rxPtr;
+  size_t _size;
+
+  //! Makes the given memory block RW protected.
+  ASMJIT_FORCE_INLINE ProtectJitReadWriteScope(void* rxPtr, size_t size) noexcept
+    : _rxPtr(rxPtr),
+      _size(size) {
+    protectJitMemory(ProtectJitAccess::kReadWrite);
+  }
+
+  // Not copyable.
+  ProtectJitReadWriteScope(const ProtectJitReadWriteScope& other) = delete;
+
+  //! Makes the memory block RX protected again and flushes instruction cache.
+  ASMJIT_FORCE_INLINE  ~ProtectJitReadWriteScope() noexcept {
+    protectJitMemory(ProtectJitAccess::kReadExecute);
+    flushInstructionCache(_rxPtr, _size);
+  }
+};
+
+} // VirtMem
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif
+#endif // ASMJIT_CORE_VIRTMEM_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/zone.cpp b/lib/lepton/asmjit/core/zone.cpp
new file mode 100644
index 0000000000..d68e110b48
--- /dev/null
+++ b/lib/lepton/asmjit/core/zone.cpp
@@ -0,0 +1,353 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/support.h"
+#include "../core/zone.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// Zone - Globals
+// ==============
+
+// Zero size block used by `Zone` that doesn't have any memory allocated. Should be allocated in read-only memory
+// and should never be modified.
+const Zone::Block Zone::_zeroBlock = { nullptr, nullptr, 0 };
+
+// Zone - Init & Reset
+// ===================
+
+void Zone::_init(size_t blockSize, size_t blockAlignment, const Support::Temporary* temporary) noexcept {
+  ASMJIT_ASSERT(blockSize >= kMinBlockSize);
+  ASMJIT_ASSERT(blockSize <= kMaxBlockSize);
+  ASMJIT_ASSERT(blockAlignment <= 64);
+
+  // Just to make the compiler happy...
+  constexpr size_t kBlockSizeMask = (Support::allOnes<size_t>() >> 4);
+  constexpr size_t kBlockAlignmentShiftMask = 0x7u;
+
+  _assignZeroBlock();
+  _blockSize = blockSize & kBlockSizeMask;
+  _isTemporary = temporary != nullptr;
+  _blockAlignmentShift = Support::ctz(blockAlignment) & kBlockAlignmentShiftMask;
+
+  // Setup the first [temporary] block, if necessary.
+  if (temporary) {
+    Block* block = temporary->data<Block>();
+    block->prev = nullptr;
+    block->next = nullptr;
+
+    ASMJIT_ASSERT(temporary->size() >= kBlockSize);
+    block->size = temporary->size() - kBlockSize;
+
+    _assignBlock(block);
+  }
+}
+
+void Zone::reset(ResetPolicy resetPolicy) noexcept {
+  Block* cur = _block;
+
+  // Can't be altered.
+  if (cur == &_zeroBlock)
+    return;
+
+  if (resetPolicy == ResetPolicy::kHard) {
+    Block* initial = const_cast<Zone::Block*>(&_zeroBlock);
+    _ptr = initial->data();
+    _end = initial->data();
+    _block = initial;
+
+    // Since cur can be in the middle of the double-linked list, we have to traverse both directions (`prev` and
+    // `next`) separately to visit all.
+    Block* next = cur->next;
+    do {
+      Block* prev = cur->prev;
+
+      // If this is the first block and this ZoneTmp is temporary then the first block is statically allocated.
+      // We cannot free it and it makes sense to keep it even when this is hard reset.
+      if (prev == nullptr && _isTemporary) {
+        cur->prev = nullptr;
+        cur->next = nullptr;
+        _assignBlock(cur);
+        break;
+      }
+
+      ::free(cur);
+      cur = prev;
+    } while (cur);
+
+    cur = next;
+    while (cur) {
+      next = cur->next;
+      ::free(cur);
+      cur = next;
+    }
+  }
+  else {
+    while (cur->prev)
+      cur = cur->prev;
+    _assignBlock(cur);
+  }
+}
+
+// Zone - Alloc
+// ============
+
+void* Zone::_alloc(size_t size, size_t alignment) noexcept {
+  Block* curBlock = _block;
+  Block* next = curBlock->next;
+
+  size_t rawBlockAlignment = blockAlignment();
+  size_t minimumAlignment = Support::max<size_t>(alignment, rawBlockAlignment);
+
+  // If the `Zone` has been cleared the current block doesn't have to be the last one. Check if there is a block
+  // that can be used instead of allocating a new one. If there is a `next` block it's completely unused, we don't
+  // have to check for remaining bytes in that case.
+  if (next) {
+    uint8_t* ptr = Support::alignUp(next->data(), minimumAlignment);
+    uint8_t* end = Support::alignDown(next->data() + next->size, rawBlockAlignment);
+
+    if (size <= (size_t)(end - ptr)) {
+      _block = next;
+      _ptr = ptr + size;
+      _end = Support::alignDown(next->data() + next->size, rawBlockAlignment);
+      return static_cast<void*>(ptr);
+    }
+  }
+
+  size_t blockAlignmentOverhead = alignment - Support::min<size_t>(alignment, Globals::kAllocAlignment);
+  size_t newSize = Support::max(blockSize(), size);
+
+  // Prevent arithmetic overflow.
+  if (ASMJIT_UNLIKELY(newSize > SIZE_MAX - kBlockSize - blockAlignmentOverhead))
+    return nullptr;
+
+  // Allocate new block - we add alignment overhead to `newSize`, which becomes the new block size, and we also add
+  // `kBlockOverhead` to the allocator as it includes members of `Zone::Block` structure.
+  newSize += blockAlignmentOverhead;
+  Block* newBlock = static_cast<Block*>(::malloc(newSize + kBlockSize));
+
+  if (ASMJIT_UNLIKELY(!newBlock))
+    return nullptr;
+
+  // Align the pointer to `minimumAlignment` and adjust the size of this block accordingly. It's the same as using
+  // `minimumAlignment - Support::alignUpDiff()`, just written differently.
+  {
+    newBlock->prev = nullptr;
+    newBlock->next = nullptr;
+    newBlock->size = newSize;
+
+    if (curBlock != &_zeroBlock) {
+      newBlock->prev = curBlock;
+      curBlock->next = newBlock;
+
+      // Does only happen if there is a next block, but the requested memory can't fit into it. In this case a new
+      // buffer is allocated and inserted between the current block and the next one.
+      if (next) {
+        newBlock->next = next;
+        next->prev = newBlock;
+      }
+    }
+
+    uint8_t* ptr = Support::alignUp(newBlock->data(), minimumAlignment);
+    uint8_t* end = Support::alignDown(newBlock->data() + newSize, rawBlockAlignment);
+
+    _ptr = ptr + size;
+    _end = end;
+    _block = newBlock;
+
+    ASMJIT_ASSERT(_ptr <= _end);
+    return static_cast<void*>(ptr);
+  }
+}
+
+void* Zone::allocZeroed(size_t size, size_t alignment) noexcept {
+  void* p = alloc(size, alignment);
+  if (ASMJIT_UNLIKELY(!p))
+    return p;
+  return memset(p, 0, size);
+}
+
+void* Zone::dup(const void* data, size_t size, bool nullTerminate) noexcept {
+  if (ASMJIT_UNLIKELY(!data || !size))
+    return nullptr;
+
+  ASMJIT_ASSERT(size != SIZE_MAX);
+  uint8_t* m = allocT<uint8_t>(size + nullTerminate);
+  if (ASMJIT_UNLIKELY(!m)) return nullptr;
+
+  memcpy(m, data, size);
+  if (nullTerminate) m[size] = '\0';
+
+  return static_cast<void*>(m);
+}
+
+char* Zone::sformat(const char* fmt, ...) noexcept {
+  if (ASMJIT_UNLIKELY(!fmt))
+    return nullptr;
+
+  char buf[512];
+  size_t size;
+  va_list ap;
+
+  va_start(ap, fmt);
+  size = unsigned(vsnprintf(buf, ASMJIT_ARRAY_SIZE(buf) - 1, fmt, ap));
+  va_end(ap);
+
+  buf[size++] = 0;
+  return static_cast<char*>(dup(buf, size));
+}
+
+// ZoneAllocator - Utilities
+// =========================
+
+#if defined(ASMJIT_BUILD_DEBUG)
+static bool ZoneAllocator_hasDynamicBlock(ZoneAllocator* self, ZoneAllocator::DynamicBlock* block) noexcept {
+  ZoneAllocator::DynamicBlock* cur = self->_dynamicBlocks;
+  while (cur) {
+    if (cur == block)
+      return true;
+    cur = cur->next;
+  }
+  return false;
+}
+#endif
+
+// ZoneAllocator - Init & Reset
+// ============================
+
+void ZoneAllocator::reset(Zone* zone) noexcept {
+  // Free dynamic blocks.
+  DynamicBlock* block = _dynamicBlocks;
+  while (block) {
+    DynamicBlock* next = block->next;
+    ::free(block);
+    block = next;
+  }
+
+  // Zero the entire class and initialize to the given `zone`.
+  memset(this, 0, sizeof(*this));
+  _zone = zone;
+}
+
+// asmjit::ZoneAllocator - Alloc & Release
+// =======================================
+
+void* ZoneAllocator::_alloc(size_t size, size_t& allocatedSize) noexcept {
+  ASMJIT_ASSERT(isInitialized());
+
+  // Use the memory pool only if the requested block has a reasonable size.
+  uint32_t slot;
+  if (_getSlotIndex(size, slot, allocatedSize)) {
+    // Slot reuse.
+    uint8_t* p = reinterpret_cast<uint8_t*>(_slots[slot]);
+    size = allocatedSize;
+
+    if (p) {
+      _slots[slot] = reinterpret_cast<Slot*>(p)->next;
+      return p;
+    }
+
+    _zone->align(kBlockAlignment);
+    p = _zone->ptr();
+    size_t remain = (size_t)(_zone->end() - p);
+
+    if (ASMJIT_LIKELY(remain >= size)) {
+      _zone->setPtr(p + size);
+      return p;
+    }
+    else {
+      // Distribute the remaining memory to suitable slots, if possible.
+      if (remain >= kLoGranularity) {
+        do {
+          size_t distSize = Support::min<size_t>(remain, kLoMaxSize);
+          uint32_t distSlot = uint32_t((distSize - kLoGranularity) / kLoGranularity);
+          ASMJIT_ASSERT(distSlot < kLoCount);
+
+          reinterpret_cast<Slot*>(p)->next = _slots[distSlot];
+          _slots[distSlot] = reinterpret_cast<Slot*>(p);
+
+          p += distSize;
+          remain -= distSize;
+        } while (remain >= kLoGranularity);
+        _zone->setPtr(p);
+      }
+
+      p = static_cast<uint8_t*>(_zone->_alloc(size, kBlockAlignment));
+      if (ASMJIT_UNLIKELY(!p)) {
+        allocatedSize = 0;
+        return nullptr;
+      }
+
+      return p;
+    }
+  }
+  else {
+    // Allocate a dynamic block.
+    size_t kBlockOverhead = sizeof(DynamicBlock) + sizeof(DynamicBlock*) + kBlockAlignment;
+
+    // Handle a possible overflow.
+    if (ASMJIT_UNLIKELY(kBlockOverhead >= SIZE_MAX - size))
+      return nullptr;
+
+    void* p = ::malloc(size + kBlockOverhead);
+    if (ASMJIT_UNLIKELY(!p)) {
+      allocatedSize = 0;
+      return nullptr;
+    }
+
+    // Link as first in `_dynamicBlocks` double-linked list.
+    DynamicBlock* block = static_cast<DynamicBlock*>(p);
+    DynamicBlock* next = _dynamicBlocks;
+
+    if (next)
+      next->prev = block;
+
+    block->prev = nullptr;
+    block->next = next;
+    _dynamicBlocks = block;
+
+    // Align the pointer to the guaranteed alignment and store `DynamicBlock`
+    // at the beginning of the memory block, so `_releaseDynamic()` can find it.
+    p = Support::alignUp(static_cast<uint8_t*>(p) + sizeof(DynamicBlock) + sizeof(DynamicBlock*), kBlockAlignment);
+    reinterpret_cast<DynamicBlock**>(p)[-1] = block;
+
+    allocatedSize = size;
+    return p;
+  }
+}
+
+void* ZoneAllocator::_allocZeroed(size_t size, size_t& allocatedSize) noexcept {
+  ASMJIT_ASSERT(isInitialized());
+
+  void* p = _alloc(size, allocatedSize);
+  if (ASMJIT_UNLIKELY(!p)) return p;
+  return memset(p, 0, allocatedSize);
+}
+
+void ZoneAllocator::_releaseDynamic(void* p, size_t size) noexcept {
+  DebugUtils::unused(size);
+  ASMJIT_ASSERT(isInitialized());
+
+  // Pointer to `DynamicBlock` is stored at [-1].
+  DynamicBlock* block = reinterpret_cast<DynamicBlock**>(p)[-1];
+  ASMJIT_ASSERT(ZoneAllocator_hasDynamicBlock(this, block));
+
+  // Unlink and free.
+  DynamicBlock* prev = block->prev;
+  DynamicBlock* next = block->next;
+
+  if (prev)
+    prev->next = next;
+  else
+    _dynamicBlocks = next;
+
+  if (next)
+    next->prev = prev;
+
+  ::free(block);
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/zone.h b/lib/lepton/asmjit/core/zone.h
new file mode 100644
index 0000000000..eaea252903
--- /dev/null
+++ b/lib/lepton/asmjit/core/zone.h
@@ -0,0 +1,615 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ZONE_H_INCLUDED
+#define ASMJIT_CORE_ZONE_H_INCLUDED
+
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_zone
+//! \{
+
+//! Zone memory.
+//!
+//! Zone is an incremental memory allocator that allocates memory by simply incrementing a pointer. It allocates
+//! blocks of memory by using C's `malloc()`, but divides these blocks into smaller segments requested by calling
+//! `Zone::alloc()` and friends.
+//!
+//! Zone has no function to release the allocated memory. It has to be released all at once by calling `reset()`.
+//! If you need a more friendly allocator that also supports `release()`, consider using `Zone` with `ZoneAllocator`.
+class Zone {
+public:
+  ASMJIT_NONCOPYABLE(Zone)
+
+  //! \cond INTERNAL
+
+  //! A single block of memory managed by `Zone`.
+  struct Block {
+    inline uint8_t* data() const noexcept {
+      return const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(this) + sizeof(*this));
+    }
+
+    //! Link to the previous block.
+    Block* prev;
+    //! Link to the next block.
+    Block* next;
+    //! Size of the block.
+    size_t size;
+  };
+
+  enum Limits : size_t {
+    kBlockSize = sizeof(Block),
+    kBlockOverhead = Globals::kAllocOverhead + kBlockSize,
+
+    kMinBlockSize = 64, // The number is ridiculously small, but still possible.
+    kMaxBlockSize = size_t(1) << (sizeof(size_t) * 8 - 4 - 1),
+    kMinAlignment = 1,
+    kMaxAlignment = 64
+  };
+
+  //! Pointer in the current block.
+  uint8_t* _ptr;
+  //! End of the current block.
+  uint8_t* _end;
+  //! Current block.
+  Block* _block;
+
+  union {
+    struct {
+      //! Default block size.
+      size_t _blockSize : Support::bitSizeOf<size_t>() - 4;
+      //! First block is temporary (ZoneTmp).
+      size_t _isTemporary : 1;
+      //! Block alignment (1 << alignment).
+      size_t _blockAlignmentShift : 3;
+    };
+    size_t _packedData;
+  };
+
+  static ASMJIT_API const Block _zeroBlock;
+
+  //! \endcond
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new Zone.
+  //!
+  //! The `blockSize` parameter describes the default size of the block. If the `size` parameter passed to `alloc()`
+  //! is greater than the default size `Zone` will allocate and use a larger block, but it will not change the
+  //! default `blockSize`.
+  //!
+  //! It's not required, but it's good practice to set `blockSize` to a reasonable value that depends on the usage
+  //! of `Zone`. Greater block sizes are generally safer and perform better than unreasonably low block sizes.
+  inline explicit Zone(size_t blockSize, size_t blockAlignment = 1) noexcept {
+    _init(blockSize, blockAlignment, nullptr);
+  }
+
+  //! Creates a new Zone with a first block pointing to a `temporary` memory.
+  inline Zone(size_t blockSize, size_t blockAlignment, const Support::Temporary& temporary) noexcept {
+    _init(blockSize, blockAlignment, &temporary);
+  }
+
+  //! \overload
+  inline Zone(size_t blockSize, size_t blockAlignment, const Support::Temporary* temporary) noexcept {
+    _init(blockSize, blockAlignment, temporary);
+  }
+
+  //! Moves an existing `Zone`.
+  //!
+  //! \note You cannot move an existing `ZoneTmp` as it uses embedded storage. Attempting to move `ZoneTmp` would
+  //! result in assertion failure in debug mode and undefined behavior in release mode.
+  inline Zone(Zone&& other) noexcept
+    : _ptr(other._ptr),
+      _end(other._end),
+      _block(other._block),
+      _packedData(other._packedData) {
+    ASMJIT_ASSERT(!other.isTemporary());
+    other._block = const_cast<Block*>(&_zeroBlock);
+    other._ptr = other._block->data();
+    other._end = other._block->data();
+  }
+
+  //! Destroys the `Zone` instance.
+  //!
+  //! This will destroy the `Zone` instance and release all blocks of memory allocated by it. It performs implicit
+  //! `reset(ResetPolicy::kHard)`.
+  inline ~Zone() noexcept { reset(ResetPolicy::kHard); }
+
+  ASMJIT_API void _init(size_t blockSize, size_t blockAlignment, const Support::Temporary* temporary) noexcept;
+
+  //! Resets the `Zone` invalidating all blocks allocated.
+  //!
+  //! See `Globals::ResetPolicy` for more details.
+  ASMJIT_API void reset(ResetPolicy resetPolicy = ResetPolicy::kSoft) noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether this `Zone` is actually a `ZoneTmp` that uses temporary memory.
+  inline bool isTemporary() const noexcept { return _isTemporary != 0; }
+
+  //! Returns the default block size.
+  inline size_t blockSize() const noexcept { return _blockSize; }
+  //! Returns the default block alignment.
+  inline size_t blockAlignment() const noexcept { return size_t(1) << _blockAlignmentShift; }
+  //! Returns remaining size of the current block.
+  inline size_t remainingSize() const noexcept { return (size_t)(_end - _ptr); }
+
+  //! Returns the current zone cursor (dangerous).
+  //!
+  //! This is a function that can be used to get exclusive access to the current block's memory buffer.
+  template<typename T = uint8_t>
+  inline T* ptr() noexcept { return reinterpret_cast<T*>(_ptr); }
+
+  //! Returns the end of the current zone block, only useful if you use `ptr()`.
+  template<typename T = uint8_t>
+  inline T* end() noexcept { return reinterpret_cast<T*>(_end); }
+
+  //! Sets the current zone pointer to `ptr` (must be within the current block).
+  template<typename T>
+  inline void setPtr(T* ptr) noexcept {
+    uint8_t* p = reinterpret_cast<uint8_t*>(ptr);
+    ASMJIT_ASSERT(p >= _ptr && p <= _end);
+    _ptr = p;
+  }
+
+  //! Sets the end zone pointer to `end` (must be within the current block).
+  template<typename T>
+  inline void setEnd(T* end) noexcept {
+    uint8_t* p = reinterpret_cast<uint8_t*>(end);
+    ASMJIT_ASSERT(p >= _ptr && p <= _end);
+    _end = p;
+  }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void swap(Zone& other) noexcept {
+    // This could lead to a disaster.
+    ASMJIT_ASSERT(!this->isTemporary());
+    ASMJIT_ASSERT(!other.isTemporary());
+
+    std::swap(_ptr, other._ptr);
+    std::swap(_end, other._end);
+    std::swap(_block, other._block);
+    std::swap(_packedData, other._packedData);
+  }
+
+  //! Aligns the current pointer to `alignment`.
+  inline void align(size_t alignment) noexcept {
+    _ptr = Support::min(Support::alignUp(_ptr, alignment), _end);
+  }
+
+  //! Ensures the remaining size is at least equal or greater than `size`.
+  //!
+  //! \note This function doesn't respect any alignment. If you need to ensure there is enough room for an aligned
+  //! allocation you need to call `align()` before calling `ensure()`.
+  inline Error ensure(size_t size) noexcept {
+    if (size <= remainingSize())
+      return kErrorOk;
+    else
+      return _alloc(0, 1) ? kErrorOk : DebugUtils::errored(kErrorOutOfMemory);
+  }
+
+  inline void _assignBlock(Block* block) noexcept {
+    size_t alignment = blockAlignment();
+    _ptr = Support::alignUp(block->data(), alignment);
+    _end = Support::alignDown(block->data() + block->size, alignment);
+    _block = block;
+  }
+
+  inline void _assignZeroBlock() noexcept {
+    Block* block = const_cast<Block*>(&_zeroBlock);
+    _ptr = block->data();
+    _end = block->data();
+    _block = block;
+  }
+
+  //! \}
+
+  //! \name Allocation
+  //! \{
+
+  //! Allocates the requested memory specified by `size`.
+  //!
+  //! Pointer returned is valid until the `Zone` instance is destroyed or reset by calling `reset()`. If you plan to
+  //! make an instance of C++ from the given pointer use placement `new` and `delete` operators:
+  //!
+  //! ```
+  //! using namespace asmjit;
+  //!
+  //! class Object { ... };
+  //!
+  //! // Create Zone with default block size of approximately 65536 bytes.
+  //! Zone zone(65536 - Zone::kBlockOverhead);
+  //!
+  //! // Create your objects using zone object allocating, for example:
+  //! Object* obj = static_cast<Object*>( zone.alloc(sizeof(Object)) );
+  //!
+  //! if (!obj) {
+  //!   // Handle out of memory error.
+  //! }
+  //!
+  //! // Placement `new` and `delete` operators can be used to instantiate it.
+  //! new(obj) Object();
+  //!
+  //! // ... lifetime of your objects ...
+  //!
+  //! // To destroy the instance (if required).
+  //! obj->~Object();
+  //!
+  //! // Reset or destroy `Zone`.
+  //! zone.reset();
+  //! ```
+  inline void* alloc(size_t size) noexcept {
+    if (ASMJIT_UNLIKELY(size > remainingSize()))
+      return _alloc(size, 1);
+
+    uint8_t* ptr = _ptr;
+    _ptr += size;
+    return static_cast<void*>(ptr);
+  }
+
+  //! Allocates the requested memory specified by `size` and `alignment`.
+  inline void* alloc(size_t size, size_t alignment) noexcept {
+    ASMJIT_ASSERT(Support::isPowerOf2(alignment));
+    uint8_t* ptr = Support::alignUp(_ptr, alignment);
+
+    if (ptr >= _end || size > (size_t)(_end - ptr))
+      return _alloc(size, alignment);
+
+    _ptr = ptr + size;
+    return static_cast<void*>(ptr);
+  }
+
+  //! Allocates the requested memory specified by `size` without doing any checks.
+  //!
+  //! Can only be called if `remainingSize()` returns size at least equal to `size`.
+  inline void* allocNoCheck(size_t size) noexcept {
+    ASMJIT_ASSERT(remainingSize() >= size);
+
+    uint8_t* ptr = _ptr;
+    _ptr += size;
+    return static_cast<void*>(ptr);
+  }
+
+  //! Allocates the requested memory specified by `size` and `alignment` without doing any checks.
+  //!
+  //! Performs the same operation as `Zone::allocNoCheck(size)` with `alignment` applied.
+  inline void* allocNoCheck(size_t size, size_t alignment) noexcept {
+    ASMJIT_ASSERT(Support::isPowerOf2(alignment));
+
+    uint8_t* ptr = Support::alignUp(_ptr, alignment);
+    ASMJIT_ASSERT(size <= (size_t)(_end - ptr));
+
+    _ptr = ptr + size;
+    return static_cast<void*>(ptr);
+  }
+
+  //! Allocates `size` bytes of zeroed memory. See `alloc()` for more details.
+  ASMJIT_API void* allocZeroed(size_t size, size_t alignment = 1) noexcept;
+
+  //! Like `alloc()`, but the return pointer is casted to `T*`.
+  template<typename T>
+  inline T* allocT(size_t size = sizeof(T), size_t alignment = alignof(T)) noexcept {
+    return static_cast<T*>(alloc(size, alignment));
+  }
+
+  //! Like `allocNoCheck()`, but the return pointer is casted to `T*`.
+  template<typename T>
+  inline T* allocNoCheckT(size_t size = sizeof(T), size_t alignment = alignof(T)) noexcept {
+    return static_cast<T*>(allocNoCheck(size, alignment));
+  }
+
+  //! Like `allocZeroed()`, but the return pointer is casted to `T*`.
+  template<typename T>
+  inline T* allocZeroedT(size_t size = sizeof(T), size_t alignment = alignof(T)) noexcept {
+    return static_cast<T*>(allocZeroed(size, alignment));
+  }
+
+  //! Like `new(std::nothrow) T(...)`, but allocated by `Zone`.
+  template<typename T>
+  inline T* newT() noexcept {
+    void* p = alloc(sizeof(T), alignof(T));
+    if (ASMJIT_UNLIKELY(!p))
+      return nullptr;
+    return new(p) T();
+  }
+
+  //! Like `new(std::nothrow) T(...)`, but allocated by `Zone`.
+  template<typename T, typename... Args>
+  inline T* newT(Args&&... args) noexcept {
+    void* p = alloc(sizeof(T), alignof(T));
+    if (ASMJIT_UNLIKELY(!p))
+      return nullptr;
+    return new(p) T(std::forward<Args>(args)...);
+  }
+
+  //! \cond INTERNAL
+  //!
+  //! Internal alloc function used by other inlines.
+  ASMJIT_API void* _alloc(size_t size, size_t alignment) noexcept;
+  //! \endcond
+
+  //! Helper to duplicate data.
+  ASMJIT_API void* dup(const void* data, size_t size, bool nullTerminate = false) noexcept;
+
+  //! Helper to duplicate data.
+  inline void* dupAligned(const void* data, size_t size, size_t alignment, bool nullTerminate = false) noexcept {
+    align(alignment);
+    return dup(data, size, nullTerminate);
+  }
+
+  //! Helper to duplicate a formatted string, maximum size is 256 bytes.
+  ASMJIT_API char* sformat(const char* str, ...) noexcept;
+
+  //! \}
+};
+
+//! \ref Zone with `N` bytes of a static storage, used for the initial block.
+//!
+//! Temporary zones are used in cases where it's known that some memory will be required, but in many cases it won't
+//! exceed N bytes, so the whole operation can be performed without a dynamic memory allocation.
+template<size_t N>
+class ZoneTmp : public Zone {
+public:
+  ASMJIT_NONCOPYABLE(ZoneTmp)
+
+  //! Temporary storage, embedded after \ref Zone.
+  struct Storage {
+    char data[N];
+  } _storage;
+
+  //! Creates a temporary zone. Dynamic block size is specified by `blockSize`.
+  inline explicit ZoneTmp(size_t blockSize, size_t blockAlignment = 1) noexcept
+    : Zone(blockSize, blockAlignment, Support::Temporary(_storage.data, N)) {}
+};
+
+//! Zone-based memory allocator that uses an existing `Zone` and provides a `release()` functionality on top of it.
+//! It uses `Zone` only for chunks that can be pooled, and uses libc `malloc()` for chunks that are large.
+//!
+//! The advantage of ZoneAllocator is that it can allocate small chunks of memory really fast, and these chunks,
+//! when released, will be reused by consecutive calls to `alloc()`. Also, since ZoneAllocator uses `Zone`, you can
+//! turn any `Zone` into a `ZoneAllocator`, and use it in your `Pass` when necessary.
+//!
+//! ZoneAllocator is used by AsmJit containers to make containers having only few elements fast (and lightweight)
+//! and to allow them to grow and use dynamic blocks when require more storage.
+class ZoneAllocator {
+public:
+  ASMJIT_NONCOPYABLE(ZoneAllocator)
+
+  //! \cond INTERNAL
+
+  // In short, we pool chunks of these sizes:
+  //   [32, 64, 96, 128, 192, 256, 320, 384, 448, 512]
+
+  enum : uint32_t {
+    //! How many bytes per a low granularity pool (has to be at least 16).
+    kLoGranularity = 32,
+    //! Number of slots of a low granularity pool.
+    kLoCount = 4,
+    //! Maximum size of a block that can be allocated in a low granularity pool.
+    kLoMaxSize = kLoGranularity * kLoCount,
+
+    //! How many bytes per a high granularity pool.
+    kHiGranularity = 64,
+    //! Number of slots of a high granularity pool.
+    kHiCount = 6,
+    //! Maximum size of a block that can be allocated in a high granularity pool.
+    kHiMaxSize = kLoMaxSize + kHiGranularity * kHiCount,
+
+    //! Alignment of every pointer returned by `alloc()`.
+    kBlockAlignment = kLoGranularity
+  };
+
+  //! Single-linked list used to store unused chunks.
+  struct Slot {
+    //! Link to a next slot in a single-linked list.
+    Slot* next;
+  };
+
+  //! A block of memory that has been allocated dynamically and is not part of block-list used by the allocator.
+  //! This is used to keep track of all these blocks so they can be freed by `reset()` if not freed explicitly.
+  struct DynamicBlock {
+    DynamicBlock* prev;
+    DynamicBlock* next;
+  };
+
+  //! \endcond
+
+  //! \name Members
+  //! \{
+
+  //! Zone used to allocate memory that fits into slots.
+  Zone* _zone;
+  //! Indexed slots containing released memory.
+  Slot* _slots[kLoCount + kHiCount];
+  //! Dynamic blocks for larger allocations (no slots).
+  DynamicBlock* _dynamicBlocks;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `ZoneAllocator`.
+  //!
+  //! \note To use it, you must first `init()` it.
+  inline ZoneAllocator() noexcept {
+    memset(this, 0, sizeof(*this));
+  }
+
+  //! Creates a new `ZoneAllocator` initialized to use `zone`.
+  inline explicit ZoneAllocator(Zone* zone) noexcept {
+    memset(this, 0, sizeof(*this));
+    _zone = zone;
+  }
+
+  //! Destroys the `ZoneAllocator`.
+  inline ~ZoneAllocator() noexcept { reset(); }
+
+  //! Tests whether the `ZoneAllocator` is initialized (i.e. has `Zone`).
+  inline bool isInitialized() const noexcept { return _zone != nullptr; }
+
+  //! Convenience function to initialize the `ZoneAllocator` with `zone`.
+  //!
+  //! It's the same as calling `reset(zone)`.
+  inline void init(Zone* zone) noexcept { reset(zone); }
+
+  //! Resets this `ZoneAllocator` and also forget about the current `Zone` which is attached (if any). Reset
+  //! optionally attaches a new `zone` passed, or keeps the `ZoneAllocator` in an uninitialized state, if
+  //! `zone` is null.
+  ASMJIT_API void reset(Zone* zone = nullptr) noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the assigned `Zone` of this allocator or null if this `ZoneAllocator` is not initialized.
+  inline Zone* zone() const noexcept { return _zone; }
+
+  //! \}
+
+  //! \cond
+  //! \name Internals
+  //! \{
+
+  //! Returns the slot index to be used for `size`. Returns `true` if a valid slot has been written to `slot` and
+  //! `allocatedSize` has been filled with slot exact size (`allocatedSize` can be equal or slightly greater than
+  //! `size`).
+  static inline bool _getSlotIndex(size_t size, uint32_t& slot) noexcept {
+    ASMJIT_ASSERT(size > 0);
+    if (size > kHiMaxSize)
+      return false;
+
+    if (size <= kLoMaxSize)
+      slot = uint32_t((size - 1) / kLoGranularity);
+    else
+      slot = uint32_t((size - kLoMaxSize - 1) / kHiGranularity) + kLoCount;
+
+    return true;
+  }
+
+  //! \overload
+  static inline bool _getSlotIndex(size_t size, uint32_t& slot, size_t& allocatedSize) noexcept {
+    ASMJIT_ASSERT(size > 0);
+    if (size > kHiMaxSize)
+      return false;
+
+    if (size <= kLoMaxSize) {
+      slot = uint32_t((size - 1) / kLoGranularity);
+      allocatedSize = Support::alignUp(size, kLoGranularity);
+    }
+    else {
+      slot = uint32_t((size - kLoMaxSize - 1) / kHiGranularity) + kLoCount;
+      allocatedSize = Support::alignUp(size, kHiGranularity);
+    }
+
+    return true;
+  }
+
+  //! \}
+  //! \endcond
+
+  //! \name Allocation
+  //! \{
+
+  //! \cond INTERNAL
+  ASMJIT_API void* _alloc(size_t size, size_t& allocatedSize) noexcept;
+  ASMJIT_API void* _allocZeroed(size_t size, size_t& allocatedSize) noexcept;
+  ASMJIT_API void _releaseDynamic(void* p, size_t size) noexcept;
+  //! \endcond
+
+  //! Allocates `size` bytes of memory, ideally from an available pool.
+  //!
+  //! \note `size` can't be zero, it will assert in debug mode in such case.
+  inline void* alloc(size_t size) noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    size_t allocatedSize;
+    return _alloc(size, allocatedSize);
+  }
+
+  //! Like `alloc(size)`, but provides a second argument `allocatedSize` that provides a way to know how big
+  //! the block returned actually is. This is useful for containers to prevent growing too early.
+  inline void* alloc(size_t size, size_t& allocatedSize) noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    return _alloc(size, allocatedSize);
+  }
+
+  //! Like `alloc()`, but the return pointer is casted to `T*`.
+  template<typename T>
+  inline T* allocT(size_t size = sizeof(T)) noexcept {
+    return static_cast<T*>(alloc(size));
+  }
+
+  //! Like `alloc(size)`, but returns zeroed memory.
+  inline void* allocZeroed(size_t size) noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    size_t allocatedSize;
+    return _allocZeroed(size, allocatedSize);
+  }
+
+  //! Like `alloc(size, allocatedSize)`, but returns zeroed memory.
+  inline void* allocZeroed(size_t size, size_t& allocatedSize) noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    return _allocZeroed(size, allocatedSize);
+  }
+
+  //! Like `allocZeroed()`, but the return pointer is casted to `T*`.
+  template<typename T>
+  inline T* allocZeroedT(size_t size = sizeof(T)) noexcept {
+    return static_cast<T*>(allocZeroed(size));
+  }
+
+  //! Like `new(std::nothrow) T(...)`, but allocated by `Zone`.
+  template<typename T>
+  inline T* newT() noexcept {
+    void* p = allocT<T>();
+    if (ASMJIT_UNLIKELY(!p))
+      return nullptr;
+    return new(p) T();
+  }
+  //! Like `new(std::nothrow) T(...)`, but allocated by `Zone`.
+  template<typename T, typename... Args>
+  inline T* newT(Args&&... args) noexcept {
+    void* p = allocT<T>();
+    if (ASMJIT_UNLIKELY(!p))
+      return nullptr;
+    return new(p) T(std::forward<Args>(args)...);
+  }
+
+  //! Releases the memory previously allocated by `alloc()`. The `size` argument has to be the same as used to call
+  //! `alloc()` or `allocatedSize` returned  by `alloc()`.
+  inline void release(void* p, size_t size) noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    ASMJIT_ASSERT(p != nullptr);
+    ASMJIT_ASSERT(size != 0);
+
+    uint32_t slot;
+    if (_getSlotIndex(size, slot)) {
+      static_cast<Slot*>(p)->next = static_cast<Slot*>(_slots[slot]);
+      _slots[slot] = static_cast<Slot*>(p);
+    }
+    else {
+      _releaseDynamic(p, size);
+    }
+  }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ZONE_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/zonehash.cpp b/lib/lepton/asmjit/core/zonehash.cpp
new file mode 100644
index 0000000000..3778fbe226
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonehash.cpp
@@ -0,0 +1,309 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/support.h"
+#include "../core/zone.h"
+#include "../core/zonehash.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// ZoneHashBase - Prime Numbers
+// ============================
+
+#define ASMJIT_POPULATE_PRIMES(ENTRY) \
+  ENTRY(2         , 0x80000000, 32), /* [N * 0x80000000 >> 32] (rcp=2147483648) */ \
+  ENTRY(11        , 0xBA2E8BA3, 35), /* [N * 0xBA2E8BA3 >> 35] (rcp=3123612579) */ \
+  ENTRY(29        , 0x8D3DCB09, 36), /* [N * 0x8D3DCB09 >> 36] (rcp=2369637129) */ \
+  ENTRY(41        , 0xC7CE0C7D, 37), /* [N * 0xC7CE0C7D >> 37] (rcp=3352169597) */ \
+  ENTRY(59        , 0x8AD8F2FC, 37), /* [N * 0x8AD8F2FC >> 37] (rcp=2329473788) */ \
+  ENTRY(83        , 0xC565C87C, 38), /* [N * 0xC565C87C >> 38] (rcp=3311782012) */ \
+  ENTRY(131       , 0xFA232CF3, 39), /* [N * 0xFA232CF3 >> 39] (rcp=4196609267) */ \
+  ENTRY(191       , 0xAB8F69E3, 39), /* [N * 0xAB8F69E3 >> 39] (rcp=2878302691) */ \
+  ENTRY(269       , 0xF3A0D52D, 40), /* [N * 0xF3A0D52D >> 40] (rcp=4087403821) */ \
+  ENTRY(383       , 0xAB1CBDD4, 40), /* [N * 0xAB1CBDD4 >> 40] (rcp=2870787540) */ \
+  ENTRY(541       , 0xF246FACC, 41), /* [N * 0xF246FACC >> 41] (rcp=4064737996) */ \
+  ENTRY(757       , 0xAD2589A4, 41), /* [N * 0xAD2589A4 >> 41] (rcp=2904918436) */ \
+  ENTRY(1061      , 0xF7129426, 42), /* [N * 0xF7129426 >> 42] (rcp=4145189926) */ \
+  ENTRY(1499      , 0xAEE116B7, 42), /* [N * 0xAEE116B7 >> 42] (rcp=2933986999) */ \
+  ENTRY(2099      , 0xF9C7A737, 43), /* [N * 0xF9C7A737 >> 43] (rcp=4190611255) */ \
+  ENTRY(2939      , 0xB263D25C, 43), /* [N * 0xB263D25C >> 43] (rcp=2992886364) */ \
+  ENTRY(4111      , 0xFF10E02E, 44), /* [N * 0xFF10E02E >> 44] (rcp=4279296046) */ \
+  ENTRY(5779      , 0xB5722823, 44), /* [N * 0xB5722823 >> 44] (rcp=3044157475) */ \
+  ENTRY(8087      , 0x81A97405, 44), /* [N * 0x81A97405 >> 44] (rcp=2175366149) */ \
+  ENTRY(11321     , 0xB93E91DB, 45), /* [N * 0xB93E91DB >> 45] (rcp=3107885531) */ \
+  ENTRY(15859     , 0x843CC26B, 45), /* [N * 0x843CC26B >> 45] (rcp=2218574443) */ \
+  ENTRY(22189     , 0xBD06B9EA, 46), /* [N * 0xBD06B9EA >> 46] (rcp=3171334634) */ \
+  ENTRY(31051     , 0x8713F186, 46), /* [N * 0x8713F186 >> 46] (rcp=2266231174) */ \
+  ENTRY(43451     , 0xC10F1CB9, 47), /* [N * 0xC10F1CB9 >> 47] (rcp=3238993081) */ \
+  ENTRY(60869     , 0x89D06A86, 47), /* [N * 0x89D06A86 >> 47] (rcp=2312137350) */ \
+  ENTRY(85159     , 0xC502AF3B, 48), /* [N * 0xC502AF3B >> 48] (rcp=3305287483) */ \
+  ENTRY(102107    , 0xA44F65AE, 48), /* [N * 0xA44F65AE >> 48] (rcp=2756666798) */ \
+  ENTRY(122449    , 0x89038F77, 48), /* [N * 0x89038F77 >> 48] (rcp=2298711927) */ \
+  ENTRY(146819    , 0xE48AF7E9, 49), /* [N * 0xE48AF7E9 >> 49] (rcp=3834312681) */ \
+  ENTRY(176041    , 0xBE9B145B, 49), /* [N * 0xBE9B145B >> 49] (rcp=3197834331) */ \
+  ENTRY(211073    , 0x9EF882BA, 49), /* [N * 0x9EF882BA >> 49] (rcp=2667086522) */ \
+  ENTRY(253081    , 0x849571AB, 49), /* [N * 0x849571AB >> 49] (rcp=2224386475) */ \
+  ENTRY(303469    , 0xDD239C97, 50), /* [N * 0xDD239C97 >> 50] (rcp=3710098583) */ \
+  ENTRY(363887    , 0xB86C196D, 50), /* [N * 0xB86C196D >> 50] (rcp=3094092141) */ \
+  ENTRY(436307    , 0x99CFA4E9, 50), /* [N * 0x99CFA4E9 >> 50] (rcp=2580522217) */ \
+  ENTRY(523177    , 0x804595C0, 50), /* [N * 0x804595C0 >> 50] (rcp=2152043968) */ \
+  ENTRY(627293    , 0xD5F69FCF, 51), /* [N * 0xD5F69FCF >> 51] (rcp=3589709775) */ \
+  ENTRY(752177    , 0xB27063BA, 51), /* [N * 0xB27063BA >> 51] (rcp=2993710010) */ \
+  ENTRY(901891    , 0x94D170AC, 51), /* [N * 0x94D170AC >> 51] (rcp=2496753836) */ \
+  ENTRY(1081369   , 0xF83C9767, 52), /* [N * 0xF83C9767 >> 52] (rcp=4164720487) */ \
+  ENTRY(1296563   , 0xCF09435D, 52), /* [N * 0xCF09435D >> 52] (rcp=3473490781) */ \
+  ENTRY(1554583   , 0xACAC7198, 52), /* [N * 0xACAC7198 >> 52] (rcp=2896982424) */ \
+  ENTRY(1863971   , 0x90033EE3, 52), /* [N * 0x90033EE3 >> 52] (rcp=2416131811) */ \
+  ENTRY(2234923   , 0xF0380EBD, 53), /* [N * 0xF0380EBD >> 53] (rcp=4030205629) */ \
+  ENTRY(2679673   , 0xC859731E, 53), /* [N * 0xC859731E >> 53] (rcp=3361305374) */ \
+  ENTRY(3212927   , 0xA718DE27, 53), /* [N * 0xA718DE27 >> 53] (rcp=2803424807) */ \
+  ENTRY(3852301   , 0x8B5D1B4B, 53), /* [N * 0x8B5D1B4B >> 53] (rcp=2338134859) */ \
+  ENTRY(4618921   , 0xE8774804, 54), /* [N * 0xE8774804 >> 54] (rcp=3900131332) */ \
+  ENTRY(5076199   , 0xD386574E, 54), /* [N * 0xD386574E >> 54] (rcp=3548796750) */ \
+  ENTRY(5578757   , 0xC0783FE1, 54), /* [N * 0xC0783FE1 >> 54] (rcp=3229106145) */ \
+  ENTRY(6131057   , 0xAF21B08F, 54), /* [N * 0xAF21B08F >> 54] (rcp=2938220687) */ \
+  ENTRY(6738031   , 0x9F5AFD6E, 54), /* [N * 0x9F5AFD6E >> 54] (rcp=2673540462) */ \
+  ENTRY(7405163   , 0x90FFC3B9, 54), /* [N * 0x90FFC3B9 >> 54] (rcp=2432680889) */ \
+  ENTRY(8138279   , 0x83EFECFC, 54), /* [N * 0x83EFECFC >> 54] (rcp=2213539068) */ \
+  ENTRY(8943971   , 0xF01AA2EF, 55), /* [N * 0xF01AA2EF >> 55] (rcp=4028277487) */ \
+  ENTRY(9829447   , 0xDA7979B2, 55), /* [N * 0xDA7979B2 >> 55] (rcp=3665394098) */ \
+  ENTRY(10802581  , 0xC6CB2771, 55), /* [N * 0xC6CB2771 >> 55] (rcp=3335202673) */ \
+  ENTRY(11872037  , 0xB4E2C7DD, 55), /* [N * 0xB4E2C7DD >> 55] (rcp=3034761181) */ \
+  ENTRY(13047407  , 0xA4974124, 55), /* [N * 0xA4974124 >> 55] (rcp=2761376036) */ \
+  ENTRY(14339107  , 0x95C39CF1, 55), /* [N * 0x95C39CF1 >> 55] (rcp=2512624881) */ \
+  ENTRY(15758737  , 0x8845C763, 55), /* [N * 0x8845C763 >> 55] (rcp=2286274403) */ \
+  ENTRY(17318867  , 0xF7FE593F, 56), /* [N * 0xF7FE593F >> 56] (rcp=4160641343) */ \
+  ENTRY(19033439  , 0xE1A75D93, 56), /* [N * 0xE1A75D93 >> 56] (rcp=3785842067) */ \
+  ENTRY(20917763  , 0xCD5389B3, 56), /* [N * 0xCD5389B3 >> 56] (rcp=3444804019) */ \
+  ENTRY(22988621  , 0xBAD4841A, 56), /* [N * 0xBAD4841A >> 56] (rcp=3134489626) */ \
+  ENTRY(25264543  , 0xA9FFF2FF, 56), /* [N * 0xA9FFF2FF >> 56] (rcp=2852123391) */ \
+  ENTRY(27765763  , 0x9AAF8BF3, 56), /* [N * 0x9AAF8BF3 >> 56] (rcp=2595195891) */ \
+  ENTRY(30514607  , 0x8CC04E18, 56), /* [N * 0x8CC04E18 >> 56] (rcp=2361413144) */ \
+  ENTRY(33535561  , 0x80127068, 56), /* [N * 0x80127068 >> 56] (rcp=2148692072) */ \
+  ENTRY(36855587  , 0xE911F0BB, 57), /* [N * 0xE911F0BB >> 57] (rcp=3910267067) */ \
+  ENTRY(38661533  , 0xDE2ED7BE, 57), /* [N * 0xDE2ED7BE >> 57] (rcp=3727611838) */ \
+  ENTRY(40555961  , 0xD3CDF2FD, 57), /* [N * 0xD3CDF2FD >> 57] (rcp=3553489661) */ \
+  ENTRY(42543269  , 0xC9E9196C, 57), /* [N * 0xC9E9196C >> 57] (rcp=3387496812) */ \
+  ENTRY(44627909  , 0xC07A9EB6, 57), /* [N * 0xC07A9EB6 >> 57] (rcp=3229261494) */ \
+  ENTRY(46814687  , 0xB77CEF65, 57), /* [N * 0xB77CEF65 >> 57] (rcp=3078418277) */ \
+  ENTRY(49108607  , 0xAEEAC65C, 57), /* [N * 0xAEEAC65C >> 57] (rcp=2934621788) */ \
+  ENTRY(51514987  , 0xA6BF0EF0, 57), /* [N * 0xA6BF0EF0 >> 57] (rcp=2797539056) */ \
+  ENTRY(54039263  , 0x9EF510B5, 57), /* [N * 0x9EF510B5 >> 57] (rcp=2666860725) */ \
+  ENTRY(56687207  , 0x97883B42, 57), /* [N * 0x97883B42 >> 57] (rcp=2542287682) */ \
+  ENTRY(59464897  , 0x907430ED, 57), /* [N * 0x907430ED >> 57] (rcp=2423533805) */ \
+  ENTRY(62378699  , 0x89B4CA91, 57), /* [N * 0x89B4CA91 >> 57] (rcp=2310326929) */ \
+  ENTRY(65435273  , 0x83461568, 57), /* [N * 0x83461568 >> 57] (rcp=2202408296) */ \
+  ENTRY(68641607  , 0xFA489AA8, 58), /* [N * 0xFA489AA8 >> 58] (rcp=4199062184) */ \
+  ENTRY(72005051  , 0xEE97B1C5, 58), /* [N * 0xEE97B1C5 >> 58] (rcp=4002918853) */ \
+  ENTRY(75533323  , 0xE3729293, 58), /* [N * 0xE3729293 >> 58] (rcp=3815936659) */ \
+  ENTRY(79234469  , 0xD8D2BBA3, 58), /* [N * 0xD8D2BBA3 >> 58] (rcp=3637689251) */ \
+  ENTRY(83116967  , 0xCEB1F196, 58), /* [N * 0xCEB1F196 >> 58] (rcp=3467768214) */ \
+  ENTRY(87189709  , 0xC50A4426, 58), /* [N * 0xC50A4426 >> 58] (rcp=3305784358) */ \
+  ENTRY(91462061  , 0xBBD6052B, 58), /* [N * 0xBBD6052B >> 58] (rcp=3151365419) */ \
+  ENTRY(95943737  , 0xB30FD999, 58), /* [N * 0xB30FD999 >> 58] (rcp=3004160409) */ \
+  ENTRY(100644991 , 0xAAB29CED, 58), /* [N * 0xAAB29CED >> 58] (rcp=2863832301) */ \
+  ENTRY(105576619 , 0xA2B96421, 58), /* [N * 0xA2B96421 >> 58] (rcp=2730058785) */ \
+  ENTRY(110749901 , 0x9B1F8434, 58), /* [N * 0x9B1F8434 >> 58] (rcp=2602533940) */ \
+  ENTRY(116176651 , 0x93E08B4A, 58), /* [N * 0x93E08B4A >> 58] (rcp=2480966474) */ \
+  ENTRY(121869317 , 0x8CF837E0, 58), /* [N * 0x8CF837E0 >> 58] (rcp=2365077472) */ \
+  ENTRY(127840913 , 0x86627F01, 58), /* [N * 0x86627F01 >> 58] (rcp=2254601985) */ \
+  ENTRY(134105159 , 0x801B8178, 58), /* [N * 0x801B8178 >> 58] (rcp=2149286264) */ \
+  ENTRY(140676353 , 0xF43F294F, 59), /* [N * 0xF43F294F >> 59] (rcp=4097780047) */ \
+  ENTRY(147569509 , 0xE8D67089, 59), /* [N * 0xE8D67089 >> 59] (rcp=3906367625) */ \
+  ENTRY(154800449 , 0xDDF6243C, 59), /* [N * 0xDDF6243C >> 59] (rcp=3723895868) */ \
+  ENTRY(162385709 , 0xD397E6AE, 59), /* [N * 0xD397E6AE >> 59] (rcp=3549947566) */ \
+  ENTRY(170342629 , 0xC9B5A65A, 59), /* [N * 0xC9B5A65A >> 59] (rcp=3384125018) */ \
+  ENTRY(178689419 , 0xC0499865, 59), /* [N * 0xC0499865 >> 59] (rcp=3226048613) */ \
+  ENTRY(187445201 , 0xB74E35FA, 59), /* [N * 0xB74E35FA >> 59] (rcp=3075356154) */ \
+  ENTRY(196630033 , 0xAEBE3AC1, 59), /* [N * 0xAEBE3AC1 >> 59] (rcp=2931702465) */ \
+  ENTRY(206264921 , 0xA694A37F, 59), /* [N * 0xA694A37F >> 59] (rcp=2794759039) */ \
+  ENTRY(216371963 , 0x9ECCA59F, 59), /* [N * 0x9ECCA59F >> 59] (rcp=2664211871) */ \
+  ENTRY(226974197 , 0x9761B6AE, 59), /* [N * 0x9761B6AE >> 59] (rcp=2539763374) */ \
+  ENTRY(238095983 , 0x904F79A1, 59), /* [N * 0x904F79A1 >> 59] (rcp=2421127585) */ \
+  ENTRY(249762697 , 0x8991CD1F, 59), /* [N * 0x8991CD1F >> 59] (rcp=2308033823) */ \
+  ENTRY(262001071 , 0x8324BCA5, 59), /* [N * 0x8324BCA5 >> 59] (rcp=2200222885) */ \
+  ENTRY(274839137 , 0xFA090732, 60), /* [N * 0xFA090732 >> 60] (rcp=4194895666) */ \
+  ENTRY(288306269 , 0xEE5B16ED, 60), /* [N * 0xEE5B16ED >> 60] (rcp=3998947053) */ \
+  ENTRY(302433337 , 0xE338CE49, 60), /* [N * 0xE338CE49 >> 60] (rcp=3812150857) */ \
+  ENTRY(317252587 , 0xD89BABC0, 60), /* [N * 0xD89BABC0 >> 60] (rcp=3634080704) */ \
+  ENTRY(374358107 , 0xB790EF43, 60), /* [N * 0xB790EF43 >> 60] (rcp=3079728963) */ \
+  ENTRY(441742621 , 0x9B908414, 60), /* [N * 0x9B908414 >> 60] (rcp=2609939476) */ \
+  ENTRY(521256293 , 0x83D596FA, 60), /* [N * 0x83D596FA >> 60] (rcp=2211813114) */ \
+  ENTRY(615082441 , 0xDF72B16E, 61), /* [N * 0xDF72B16E >> 61] (rcp=3748835694) */ \
+  ENTRY(725797313 , 0xBD5CDB3B, 61), /* [N * 0xBD5CDB3B >> 61] (rcp=3176979259) */ \
+  ENTRY(856440829 , 0xA07A14E9, 61), /* [N * 0xA07A14E9 >> 61] (rcp=2692355305) */ \
+  ENTRY(1010600209, 0x87FF5289, 61), /* [N * 0x87FF5289 >> 61] (rcp=2281656969) */ \
+  ENTRY(1192508257, 0xE6810540, 62), /* [N * 0xE6810540 >> 62] (rcp=3867215168) */ \
+  ENTRY(1407159797, 0xC357A480, 62), /* [N * 0xC357A480 >> 62] (rcp=3277300864) */ \
+  ENTRY(1660448617, 0xA58B5B4F, 62), /* [N * 0xA58B5B4F >> 62] (rcp=2777373519) */ \
+  ENTRY(1959329399, 0x8C4AB55F, 62), /* [N * 0x8C4AB55F >> 62] (rcp=2353706335) */ \
+  ENTRY(2312008693, 0xEDC86320, 63), /* [N * 0xEDC86320 >> 63] (rcp=3989332768) */ \
+  ENTRY(2728170257, 0xC982C4D2, 63), /* [N * 0xC982C4D2 >> 63] (rcp=3380790482) */ \
+  ENTRY(3219240923, 0xAAC599B6, 63)  /* [N * 0xAAC599B6 >> 63] (rcp=2865076662) */
+
+
+struct HashPrime {
+  //! Prime number
+  uint32_t prime;
+  //! Reciprocal to turn division into multiplication.
+  uint32_t rcp;
+};
+
+static const HashPrime ZoneHash_primeArray[] = {
+  #define E(PRIME, RCP, SHIFT) { PRIME, RCP }
+  ASMJIT_POPULATE_PRIMES(E)
+  #undef E
+};
+
+static const uint8_t ZoneHash_primeShift[] = {
+  #define E(PRIME, RCP, SHIFT) uint8_t(SHIFT)
+  ASMJIT_POPULATE_PRIMES(E)
+  #undef E
+};
+
+// ZoneHashBase - Rehash
+// =====================
+
+void ZoneHashBase::_rehash(ZoneAllocator* allocator, uint32_t primeIndex) noexcept {
+  ASMJIT_ASSERT(primeIndex < ASMJIT_ARRAY_SIZE(ZoneHash_primeArray));
+  uint32_t newCount = ZoneHash_primeArray[primeIndex].prime;
+
+  ZoneHashNode** oldData = _data;
+  ZoneHashNode** newData = reinterpret_cast<ZoneHashNode**>(
+    allocator->allocZeroed(size_t(newCount) * sizeof(ZoneHashNode*)));
+
+  // We can still store nodes into the table, but it will degrade.
+  if (ASMJIT_UNLIKELY(newData == nullptr))
+    return;
+
+  uint32_t i;
+  uint32_t oldCount = _bucketsCount;
+
+  _data = newData;
+  _bucketsCount = newCount;
+  _bucketsGrow = uint32_t(newCount * 0.9);
+  _rcpValue = ZoneHash_primeArray[primeIndex].rcp;
+  _rcpShift = ZoneHash_primeShift[primeIndex];
+  _primeIndex = uint8_t(primeIndex);
+
+  for (i = 0; i < oldCount; i++) {
+    ZoneHashNode* node = oldData[i];
+    while (node) {
+      ZoneHashNode* next = node->_hashNext;
+      uint32_t hashMod = _calcMod(node->_hashCode);
+
+      node->_hashNext = newData[hashMod];
+      newData[hashMod] = node;
+      node = next;
+    }
+  }
+
+  if (oldData != _embedded)
+    allocator->release(oldData, oldCount * sizeof(ZoneHashNode*));
+}
+
+// ZoneHashBase - Operations
+// =========================
+
+ZoneHashNode* ZoneHashBase::_insert(ZoneAllocator* allocator, ZoneHashNode* node) noexcept {
+  uint32_t hashMod = _calcMod(node->_hashCode);
+  ZoneHashNode* next = _data[hashMod];
+
+  node->_hashNext = next;
+  _data[hashMod] = node;
+
+  if (++_size > _bucketsGrow) {
+    uint32_t primeIndex = Support::min<uint32_t>(_primeIndex + 2, ASMJIT_ARRAY_SIZE(ZoneHash_primeArray) - 1);
+    if (primeIndex > _primeIndex)
+      _rehash(allocator, primeIndex);
+  }
+
+  return node;
+}
+
+ZoneHashNode* ZoneHashBase::_remove(ZoneAllocator* allocator, ZoneHashNode* node) noexcept {
+  DebugUtils::unused(allocator);
+  uint32_t hashMod = _calcMod(node->_hashCode);
+
+  ZoneHashNode** pPrev = &_data[hashMod];
+  ZoneHashNode* p = *pPrev;
+
+  while (p) {
+    if (p == node) {
+      *pPrev = p->_hashNext;
+      _size--;
+      return node;
+    }
+
+    pPrev = &p->_hashNext;
+    p = *pPrev;
+  }
+
+  return nullptr;
+}
+
+// ZoneHashBase - Tests
+// ====================
+
+#if defined(ASMJIT_TEST)
+struct MyHashNode : public ZoneHashNode {
+  inline MyHashNode(uint32_t key) noexcept
+    : ZoneHashNode(key),
+      _key(key) {}
+
+  uint32_t _key;
+};
+
+struct MyKeyMatcher {
+  inline MyKeyMatcher(uint32_t key) noexcept
+    : _key(key) {}
+
+  inline uint32_t hashCode() const noexcept { return _key; }
+  inline bool matches(const MyHashNode* node) const noexcept { return node->_key == _key; }
+
+  uint32_t _key;
+};
+
+UNIT(zone_hash) {
+  uint32_t kCount = BrokenAPI::hasArg("--quick") ? 1000 : 10000;
+
+  Zone zone(4096);
+  ZoneAllocator allocator(&zone);
+
+  ZoneHash<MyHashNode> hashTable;
+
+  uint32_t key;
+  INFO("Inserting %u elements to HashTable", unsigned(kCount));
+  for (key = 0; key < kCount; key++) {
+    hashTable.insert(&allocator, zone.newT<MyHashNode>(key));
+  }
+
+  uint32_t count = kCount;
+  INFO("Removing %u elements from HashTable and validating each operation", unsigned(kCount));
+  do {
+    MyHashNode* node;
+
+    for (key = 0; key < count; key++) {
+      node = hashTable.get(MyKeyMatcher(key));
+      EXPECT(node != nullptr);
+      EXPECT(node->_key == key);
+    }
+
+    {
+      count--;
+      node = hashTable.get(MyKeyMatcher(count));
+      hashTable.remove(&allocator, node);
+
+      node = hashTable.get(MyKeyMatcher(count));
+      EXPECT(node == nullptr);
+    }
+  } while (count);
+
+  EXPECT(hashTable.empty());
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/zonehash.h b/lib/lepton/asmjit/core/zonehash.h
new file mode 100644
index 0000000000..f332290b54
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonehash.h
@@ -0,0 +1,186 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ZONEHASH_H_INCLUDED
+#define ASMJIT_CORE_ZONEHASH_H_INCLUDED
+
+#include "../core/zone.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_zone
+//! \{
+
+//! Node used by \ref ZoneHash template.
+//!
+//! You must provide function `bool eq(const Key& key)` in order to make `ZoneHash::get()` working.
+class ZoneHashNode {
+public:
+  ASMJIT_NONCOPYABLE(ZoneHashNode)
+
+  inline ZoneHashNode(uint32_t hashCode = 0) noexcept
+    : _hashNext(nullptr),
+      _hashCode(hashCode),
+      _customData(0) {}
+
+  //! Next node in the chain, null if it terminates the chain.
+  ZoneHashNode* _hashNext;
+  //! Precalculated hash-code of key.
+  uint32_t _hashCode;
+  //! Padding, can be reused by any Node that inherits `ZoneHashNode`.
+  uint32_t _customData;
+};
+
+//! Base class used by \ref ZoneHash template
+class ZoneHashBase {
+public:
+  ASMJIT_NONCOPYABLE(ZoneHashBase)
+
+  //! Buckets data.
+  ZoneHashNode** _data;
+  //! Count of records inserted into the hash table.
+  size_t _size;
+  //! Count of hash buckets.
+  uint32_t _bucketsCount;
+  //! When buckets array should grow (only checked after insertion).
+  uint32_t _bucketsGrow;
+  //! Reciprocal value of `_bucketsCount`.
+  uint32_t _rcpValue;
+  //! How many bits to shift right when hash is multiplied with `_rcpValue`.
+  uint8_t _rcpShift;
+  //! Prime value index in internal prime array.
+  uint8_t _primeIndex;
+
+  //! Embedded data, used by empty hash tables.
+  ZoneHashNode* _embedded[1];
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneHashBase() noexcept {
+    reset();
+  }
+
+  inline ZoneHashBase(ZoneHashBase&& other) noexcept {
+    _data = other._data;
+    _size = other._size;
+    _bucketsCount = other._bucketsCount;
+    _bucketsGrow = other._bucketsGrow;
+    _rcpValue = other._rcpValue;
+    _rcpShift = other._rcpShift;
+    _primeIndex = other._primeIndex;
+    _embedded[0] = other._embedded[0];
+
+    if (_data == other._embedded) _data = _embedded;
+  }
+
+  inline void reset() noexcept {
+    _data = _embedded;
+    _size = 0;
+    _bucketsCount = 1;
+    _bucketsGrow = 1;
+    _rcpValue = 1;
+    _rcpShift = 0;
+    _primeIndex = 0;
+    _embedded[0] = nullptr;
+  }
+
+  inline void release(ZoneAllocator* allocator) noexcept {
+    ZoneHashNode** oldData = _data;
+    if (oldData != _embedded)
+      allocator->release(oldData, _bucketsCount * sizeof(ZoneHashNode*));
+    reset();
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool empty() const noexcept { return _size == 0; }
+  inline size_t size() const noexcept { return _size; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void _swap(ZoneHashBase& other) noexcept {
+    std::swap(_data, other._data);
+    std::swap(_size, other._size);
+    std::swap(_bucketsCount, other._bucketsCount);
+    std::swap(_bucketsGrow, other._bucketsGrow);
+    std::swap(_rcpValue, other._rcpValue);
+    std::swap(_rcpShift, other._rcpShift);
+    std::swap(_primeIndex, other._primeIndex);
+    std::swap(_embedded[0], other._embedded[0]);
+
+    if (_data == other._embedded) _data = _embedded;
+    if (other._data == _embedded) other._data = other._embedded;
+  }
+
+  //! \cond INTERNAL
+  inline uint32_t _calcMod(uint32_t hash) const noexcept {
+    uint32_t x = uint32_t((uint64_t(hash) * _rcpValue) >> _rcpShift);
+    return hash - x * _bucketsCount;
+  }
+
+  ASMJIT_API void _rehash(ZoneAllocator* allocator, uint32_t newCount) noexcept;
+  ASMJIT_API ZoneHashNode* _insert(ZoneAllocator* allocator, ZoneHashNode* node) noexcept;
+  ASMJIT_API ZoneHashNode* _remove(ZoneAllocator* allocator, ZoneHashNode* node) noexcept;
+  //! \endcond
+
+  //! \}
+};
+
+//! Low-level hash table specialized for storing string keys and POD values.
+//!
+//! This hash table allows duplicates to be inserted (the API is so low level that it's up to you if you allow it or
+//! not, as you should first `get()` the node and then modify it or insert a new node by using `insert()`, depending
+//! on the intention).
+template<typename NodeT>
+class ZoneHash : public ZoneHashBase {
+public:
+  ASMJIT_NONCOPYABLE(ZoneHash)
+
+  typedef NodeT Node;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneHash() noexcept
+    : ZoneHashBase() {}
+
+  inline ZoneHash(ZoneHash&& other) noexcept
+    : ZoneHash(other) {}
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void swap(ZoneHash& other) noexcept { ZoneHashBase::_swap(other); }
+
+  template<typename KeyT>
+  inline NodeT* get(const KeyT& key) const noexcept {
+    uint32_t hashMod = _calcMod(key.hashCode());
+    NodeT* node = static_cast<NodeT*>(_data[hashMod]);
+
+    while (node && !key.matches(node))
+      node = static_cast<NodeT*>(node->_hashNext);
+    return node;
+  }
+
+  inline NodeT* insert(ZoneAllocator* allocator, NodeT* node) noexcept { return static_cast<NodeT*>(_insert(allocator, node)); }
+  inline NodeT* remove(ZoneAllocator* allocator, NodeT* node) noexcept { return static_cast<NodeT*>(_remove(allocator, node)); }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ZONEHASH_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/zonelist.cpp b/lib/lepton/asmjit/core/zonelist.cpp
new file mode 100644
index 0000000000..d4b311d430
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonelist.cpp
@@ -0,0 +1,163 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/zone.h"
+#include "../core/zonelist.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// ZoneList - Tests
+// ================
+
+#if defined(ASMJIT_TEST)
+class MyListNode : public ZoneListNode<MyListNode> {};
+
+UNIT(zone_list) {
+  Zone zone(4096);
+  ZoneList<MyListNode> list;
+
+  MyListNode* a = zone.newT<MyListNode>();
+  MyListNode* b = zone.newT<MyListNode>();
+  MyListNode* c = zone.newT<MyListNode>();
+  MyListNode* d = zone.newT<MyListNode>();
+
+  INFO("Append / Unlink");
+
+  // []
+  EXPECT(list.empty() == true);
+
+  // [A]
+  list.append(a);
+  EXPECT(list.empty() == false);
+  EXPECT(list.first() == a);
+  EXPECT(list.last() == a);
+  EXPECT(a->prev() == nullptr);
+  EXPECT(a->next() == nullptr);
+
+  // [A, B]
+  list.append(b);
+  EXPECT(list.first() == a);
+  EXPECT(list.last() == b);
+  EXPECT(a->prev() == nullptr);
+  EXPECT(a->next() == b);
+  EXPECT(b->prev() == a);
+  EXPECT(b->next() == nullptr);
+
+  // [A, B, C]
+  list.append(c);
+  EXPECT(list.first() == a);
+  EXPECT(list.last() == c);
+  EXPECT(a->prev() == nullptr);
+  EXPECT(a->next() == b);
+  EXPECT(b->prev() == a);
+  EXPECT(b->next() == c);
+  EXPECT(c->prev() == b);
+  EXPECT(c->next() == nullptr);
+
+  // [B, C]
+  list.unlink(a);
+  EXPECT(list.first() == b);
+  EXPECT(list.last() == c);
+  EXPECT(a->prev() == nullptr);
+  EXPECT(a->next() == nullptr);
+  EXPECT(b->prev() == nullptr);
+  EXPECT(b->next() == c);
+  EXPECT(c->prev() == b);
+  EXPECT(c->next() == nullptr);
+
+  // [B]
+  list.unlink(c);
+  EXPECT(list.first() == b);
+  EXPECT(list.last() == b);
+  EXPECT(b->prev() == nullptr);
+  EXPECT(b->next() == nullptr);
+  EXPECT(c->prev() == nullptr);
+  EXPECT(c->next() == nullptr);
+
+  // []
+  list.unlink(b);
+  EXPECT(list.empty() == true);
+  EXPECT(list.first() == nullptr);
+  EXPECT(list.last() == nullptr);
+  EXPECT(b->prev() == nullptr);
+  EXPECT(b->next() == nullptr);
+
+  INFO("Prepend / Unlink");
+
+  // [A]
+  list.prepend(a);
+  EXPECT(list.empty() == false);
+  EXPECT(list.first() == a);
+  EXPECT(list.last() == a);
+  EXPECT(a->prev() == nullptr);
+  EXPECT(a->next() == nullptr);
+
+  // [B, A]
+  list.prepend(b);
+  EXPECT(list.first() == b);
+  EXPECT(list.last() == a);
+  EXPECT(b->prev() == nullptr);
+  EXPECT(b->next() == a);
+  EXPECT(a->prev() == b);
+  EXPECT(a->next() == nullptr);
+
+  INFO("InsertAfter / InsertBefore");
+
+  // [B, A, C]
+  list.insertAfter(a, c);
+  EXPECT(list.first() == b);
+  EXPECT(list.last() == c);
+  EXPECT(b->prev() == nullptr);
+  EXPECT(b->next() == a);
+  EXPECT(a->prev() == b);
+  EXPECT(a->next() == c);
+  EXPECT(c->prev() == a);
+  EXPECT(c->next() == nullptr);
+
+  // [B, D, A, C]
+  list.insertBefore(a, d);
+  EXPECT(list.first() == b);
+  EXPECT(list.last() == c);
+  EXPECT(b->prev() == nullptr);
+  EXPECT(b->next() == d);
+  EXPECT(d->prev() == b);
+  EXPECT(d->next() == a);
+  EXPECT(a->prev() == d);
+  EXPECT(a->next() == c);
+  EXPECT(c->prev() == a);
+  EXPECT(c->next() == nullptr);
+
+  INFO("PopFirst / Pop");
+
+  // [D, A, C]
+  EXPECT(list.popFirst() == b);
+  EXPECT(b->prev() == nullptr);
+  EXPECT(b->next() == nullptr);
+
+  EXPECT(list.first() == d);
+  EXPECT(list.last() == c);
+  EXPECT(d->prev() == nullptr);
+  EXPECT(d->next() == a);
+  EXPECT(a->prev() == d);
+  EXPECT(a->next() == c);
+  EXPECT(c->prev() == a);
+  EXPECT(c->next() == nullptr);
+
+  // [D, A]
+  EXPECT(list.pop() == c);
+  EXPECT(c->prev() == nullptr);
+  EXPECT(c->next() == nullptr);
+
+  EXPECT(list.first() == d);
+  EXPECT(list.last() == a);
+  EXPECT(d->prev() == nullptr);
+  EXPECT(d->next() == a);
+  EXPECT(a->prev() == d);
+  EXPECT(a->next() == nullptr);
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/zonelist.h b/lib/lepton/asmjit/core/zonelist.h
new file mode 100644
index 0000000000..c5e0013658
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonelist.h
@@ -0,0 +1,209 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ZONELIST_H_INCLUDED
+#define ASMJIT_CORE_ZONELIST_H_INCLUDED
+
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_zone
+//! \{
+
+//! Node used by \ref ZoneList template.
+template<typename NodeT>
+class ZoneListNode {
+public:
+  ASMJIT_NONCOPYABLE(ZoneListNode)
+
+  //! \name Constants
+  //! \{
+
+  enum : size_t {
+    kNodeIndexPrev = 0,
+    kNodeIndexNext = 1
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  NodeT* _listNodes[2];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneListNode() noexcept
+    : _listNodes { nullptr, nullptr } {}
+
+  inline ZoneListNode(ZoneListNode&& other) noexcept
+    : _listNodes { other._listNodes[0], other._listNodes[1] } {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool hasPrev() const noexcept { return _listNodes[kNodeIndexPrev] != nullptr; }
+  inline bool hasNext() const noexcept { return _listNodes[kNodeIndexNext] != nullptr; }
+
+  inline NodeT* prev() const noexcept { return _listNodes[kNodeIndexPrev]; }
+  inline NodeT* next() const noexcept { return _listNodes[kNodeIndexNext]; }
+
+  //! \}
+};
+
+//! Zone allocated list container that uses nodes of `NodeT` type.
+template <typename NodeT>
+class ZoneList {
+public:
+  ASMJIT_NONCOPYABLE(ZoneList)
+
+  //! \name Constants
+  //! \{
+
+  enum : size_t {
+    kNodeIndexFirst = 0,
+    kNodeIndexLast = 1
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  NodeT* _nodes[2];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneList() noexcept
+    : _nodes { nullptr, nullptr } {}
+
+  inline ZoneList(ZoneList&& other) noexcept
+    : _nodes { other._nodes[0], other._nodes[1] } {}
+
+  inline void reset() noexcept {
+    _nodes[0] = nullptr;
+    _nodes[1] = nullptr;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool empty() const noexcept { return _nodes[0] == nullptr; }
+  inline NodeT* first() const noexcept { return _nodes[kNodeIndexFirst]; }
+  inline NodeT* last() const noexcept { return _nodes[kNodeIndexLast]; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void swap(ZoneList& other) noexcept {
+    std::swap(_nodes[0], other._nodes[0]);
+    std::swap(_nodes[1], other._nodes[1]);
+  }
+
+  // Can be used to both append and prepend.
+  inline void _addNode(NodeT* node, size_t dir) noexcept {
+    NodeT* prev = _nodes[dir];
+
+    node->_listNodes[!dir] = prev;
+    _nodes[dir] = node;
+    if (prev)
+      prev->_listNodes[dir] = node;
+    else
+      _nodes[!dir] = node;
+  }
+
+  // Can be used to both append and prepend.
+  inline void _insertNode(NodeT* ref, NodeT* node, size_t dir) noexcept {
+    ASMJIT_ASSERT(ref != nullptr);
+
+    NodeT* prev = ref;
+    NodeT* next = ref->_listNodes[dir];
+
+    prev->_listNodes[dir] = node;
+    if (next)
+      next->_listNodes[!dir] = node;
+    else
+      _nodes[dir] = node;
+
+    node->_listNodes[!dir] = prev;
+    node->_listNodes[ dir] = next;
+  }
+
+  inline void append(NodeT* node) noexcept { _addNode(node, kNodeIndexLast); }
+  inline void prepend(NodeT* node) noexcept { _addNode(node, kNodeIndexFirst); }
+
+  inline void insertAfter(NodeT* ref, NodeT* node) noexcept { _insertNode(ref, node, NodeT::kNodeIndexNext); }
+  inline void insertBefore(NodeT* ref, NodeT* node) noexcept { _insertNode(ref, node, NodeT::kNodeIndexPrev); }
+
+  inline NodeT* unlink(NodeT* node) noexcept {
+    NodeT* prev = node->prev();
+    NodeT* next = node->next();
+
+    if (prev) { prev->_listNodes[1] = next; node->_listNodes[0] = nullptr; } else { _nodes[0] = next; }
+    if (next) { next->_listNodes[0] = prev; node->_listNodes[1] = nullptr; } else { _nodes[1] = prev; }
+
+    node->_listNodes[0] = nullptr;
+    node->_listNodes[1] = nullptr;
+
+    return node;
+  }
+
+  inline NodeT* popFirst() noexcept {
+    NodeT* node = _nodes[0];
+    ASMJIT_ASSERT(node != nullptr);
+
+    NodeT* next = node->next();
+    _nodes[0] = next;
+
+    if (next) {
+      next->_listNodes[0] = nullptr;
+      node->_listNodes[1] = nullptr;
+    }
+    else {
+      _nodes[1] = nullptr;
+    }
+
+    return node;
+  }
+
+  inline NodeT* pop() noexcept {
+    NodeT* node = _nodes[1];
+    ASMJIT_ASSERT(node != nullptr);
+
+    NodeT* prev = node->prev();
+    _nodes[1] = prev;
+
+    if (prev) {
+      prev->_listNodes[1] = nullptr;
+      node->_listNodes[0] = nullptr;
+    }
+    else {
+      _nodes[0] = nullptr;
+    }
+
+    return node;
+  }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ZONELIST_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/zonestack.cpp b/lib/lepton/asmjit/core/zonestack.cpp
new file mode 100644
index 0000000000..77e6f202c7
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonestack.cpp
@@ -0,0 +1,176 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/zone.h"
+#include "../core/zonestack.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// ZoneStackBase - Init & Reset
+// ============================
+
+Error ZoneStackBase::_init(ZoneAllocator* allocator, size_t middleIndex) noexcept {
+  ZoneAllocator* oldAllocator = _allocator;
+
+  if (oldAllocator) {
+    Block* block = _block[kBlockIndexFirst];
+    while (block) {
+      Block* next = block->next();
+      oldAllocator->release(block, kBlockSize);
+      block = next;
+    }
+
+    _allocator = nullptr;
+    _block[kBlockIndexFirst] = nullptr;
+    _block[kBlockIndexLast] = nullptr;
+  }
+
+  if (allocator) {
+    Block* block = static_cast<Block*>(allocator->alloc(kBlockSize));
+    if (ASMJIT_UNLIKELY(!block))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    block->_link[kBlockIndexPrev] = nullptr;
+    block->_link[kBlockIndexNext] = nullptr;
+    block->_start = (uint8_t*)block + middleIndex;
+    block->_end = (uint8_t*)block + middleIndex;
+
+    _allocator = allocator;
+    _block[kBlockIndexFirst] = block;
+    _block[kBlockIndexLast] = block;
+  }
+
+  return kErrorOk;
+}
+
+// ZoneStackBase - Operations
+// ==========================
+
+Error ZoneStackBase::_prepareBlock(uint32_t side, size_t initialIndex) noexcept {
+  ASMJIT_ASSERT(isInitialized());
+
+  Block* prev = _block[side];
+  ASMJIT_ASSERT(!prev->empty());
+
+  Block* block = _allocator->allocT<Block>(kBlockSize);
+  if (ASMJIT_UNLIKELY(!block))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  block->_link[ side] = nullptr;
+  block->_link[!side] = prev;
+  block->_start = (uint8_t*)block + initialIndex;
+  block->_end = (uint8_t*)block + initialIndex;
+
+  prev->_link[side] = block;
+  _block[side] = block;
+
+  return kErrorOk;
+}
+
+void ZoneStackBase::_cleanupBlock(uint32_t side, size_t middleIndex) noexcept {
+  Block* block = _block[side];
+  ASMJIT_ASSERT(block->empty());
+
+  Block* prev = block->_link[!side];
+  if (prev) {
+    ASMJIT_ASSERT(prev->_link[side] == block);
+    _allocator->release(block, kBlockSize);
+
+    prev->_link[side] = nullptr;
+    _block[side] = prev;
+  }
+  else if (_block[!side] == block) {
+    // If the container becomes empty center both pointers in the remaining block.
+    block->_start = (uint8_t*)block + middleIndex;
+    block->_end = (uint8_t*)block + middleIndex;
+  }
+}
+
+// ZoneStack - Tests
+// =================
+
+#if defined(ASMJIT_TEST)
+template<typename T>
+static void test_zone_stack(ZoneAllocator* allocator, const char* typeName) {
+  ZoneStack<T> stack;
+
+  INFO("Testing ZoneStack<%s>", typeName);
+  INFO("  (%d items per one Block)", ZoneStack<T>::kNumBlockItems);
+
+  EXPECT(stack.init(allocator) == kErrorOk);
+  EXPECT(stack.empty(), "Stack must be empty after `init()`");
+
+  EXPECT(stack.append(42) == kErrorOk);
+  EXPECT(!stack.empty()        , "Stack must not be empty after an item has been appended");
+  EXPECT(stack.pop() == 42     , "Stack.pop() must return the item that has been appended last");
+  EXPECT(stack.empty()         , "Stack must be empty after the last item has been removed");
+
+  EXPECT(stack.prepend(43) == kErrorOk);
+  EXPECT(!stack.empty()        , "Stack must not be empty after an item has been prepended");
+  EXPECT(stack.popFirst() == 43, "Stack.popFirst() must return the item that has been prepended last");
+  EXPECT(stack.empty()         , "Stack must be empty after the last item has been removed");
+
+  int i;
+  int iMin =-100000;
+  int iMax = 100000;
+
+  INFO("Validating prepend() & popFirst()");
+  for (i = iMax; i >= 0; i--) stack.prepend(T(i));
+  for (i = 0; i <= iMax; i++) {
+    T item = stack.popFirst();
+    EXPECT(i == item, "Item '%d' didn't match the item '%lld' popped", i, (long long)item);
+    if (!stack.empty()) {
+      item = stack.popFirst();
+      EXPECT(i + 1 == item, "Item '%d' didn't match the item '%lld' popped", i + 1, (long long)item);
+      stack.prepend(item);
+    }
+  }
+  EXPECT(stack.empty());
+
+  INFO("Validating append() & pop()");
+  for (i = 0; i <= iMax; i++) stack.append(T(i));
+  for (i = iMax; i >= 0; i--) {
+    T item = stack.pop();
+    EXPECT(i == item, "Item '%d' didn't match the item '%lld' popped", i, (long long)item);
+    if (!stack.empty()) {
+      item = stack.pop();
+      EXPECT(i - 1 == item, "Item '%d' didn't match the item '%lld' popped", i - 1, (long long)item);
+      stack.append(item);
+    }
+  }
+  EXPECT(stack.empty());
+
+  INFO("Validating append()/prepend() & popFirst()");
+  for (i = 1; i <= iMax; i++) stack.append(T(i));
+  for (i = 0; i >= iMin; i--) stack.prepend(T(i));
+
+  for (i = iMin; i <= iMax; i++) {
+    T item = stack.popFirst();
+    EXPECT(i == item, "Item '%d' didn't match the item '%lld' popped", i, (long long)item);
+  }
+  EXPECT(stack.empty());
+
+  INFO("Validating append()/prepend() & pop()");
+  for (i = 0; i >= iMin; i--) stack.prepend(T(i));
+  for (i = 1; i <= iMax; i++) stack.append(T(i));
+
+  for (i = iMax; i >= iMin; i--) {
+    T item = stack.pop();
+    EXPECT(i == item, "Item '%d' didn't match the item '%lld' popped", i, (long long)item);
+  }
+  EXPECT(stack.empty());
+}
+
+UNIT(zone_stack) {
+  Zone zone(8096 - Zone::kBlockOverhead);
+  ZoneAllocator allocator(&zone);
+
+  test_zone_stack<int>(&allocator, "int");
+  test_zone_stack<int64_t>(&allocator, "int64_t");
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/zonestack.h b/lib/lepton/asmjit/core/zonestack.h
new file mode 100644
index 0000000000..aea7b6868f
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonestack.h
@@ -0,0 +1,239 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ZONESTACK_H_INCLUDED
+#define ASMJIT_CORE_ZONESTACK_H_INCLUDED
+
+#include "../core/zone.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_zone
+//! \{
+
+//! Base class used by \ref ZoneStack.
+class ZoneStackBase {
+public:
+  ASMJIT_NONCOPYABLE(ZoneStackBase)
+
+  //! \name Constants
+  //! \{
+
+  enum : size_t {
+    kBlockIndexPrev = 0,
+    kBlockIndexNext = 1,
+
+    kBlockIndexFirst = 0,
+    kBlockIndexLast = 1,
+
+    kBlockSize = ZoneAllocator::kHiMaxSize
+  };
+
+  //! \}
+
+  //! \name Types
+  //! \{
+
+  struct Block {
+    //! Next and previous blocks.
+    Block* _link[2];
+    //! Pointer to the start of the array.
+    void* _start;
+    //! Pointer to the end of the array.
+    void* _end;
+
+    inline bool empty() const noexcept { return _start == _end; }
+    inline Block* prev() const noexcept { return _link[kBlockIndexPrev]; }
+    inline Block* next() const noexcept { return _link[kBlockIndexNext]; }
+
+    inline void setPrev(Block* block) noexcept { _link[kBlockIndexPrev] = block; }
+    inline void setNext(Block* block) noexcept { _link[kBlockIndexNext] = block; }
+
+    template<typename T>
+    inline T* start() const noexcept { return static_cast<T*>(_start); }
+    template<typename T>
+    inline void setStart(T* start) noexcept { _start = static_cast<void*>(start); }
+
+    template<typename T>
+    inline T* end() const noexcept { return (T*)_end; }
+    template<typename T>
+    inline void setEnd(T* end) noexcept { _end = (void*)end; }
+
+    template<typename T>
+    inline T* data() const noexcept { return (T*)((uint8_t*)(this) + sizeof(Block)); }
+
+    template<typename T>
+    inline bool canPrepend() const noexcept { return _start > data<void>(); }
+
+    template<typename T>
+    inline bool canAppend() const noexcept {
+      size_t kNumBlockItems = (kBlockSize - sizeof(Block)) / sizeof(T);
+      size_t kStartBlockIndex = sizeof(Block);
+      size_t kEndBlockIndex = kStartBlockIndex + kNumBlockItems * sizeof(T);
+
+      return (uintptr_t)_end <= ((uintptr_t)this + kEndBlockIndex - sizeof(T));
+    }
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Allocator used to allocate data.
+  ZoneAllocator* _allocator;
+  //! First and last blocks.
+  Block* _block[2];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneStackBase() noexcept {
+    _allocator = nullptr;
+    _block[0] = nullptr;
+    _block[1] = nullptr;
+  }
+  inline ~ZoneStackBase() noexcept { reset(); }
+
+  inline bool isInitialized() const noexcept { return _allocator != nullptr; }
+  ASMJIT_API Error _init(ZoneAllocator* allocator, size_t middleIndex) noexcept;
+  inline Error reset() noexcept { return _init(nullptr, 0); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns `ZoneAllocator` attached to this container.
+  inline ZoneAllocator* allocator() const noexcept { return _allocator; }
+
+  inline bool empty() const noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    return _block[0]->start<void>() == _block[1]->end<void>();
+  }
+
+  //! \}
+
+  //! \cond INTERNAL
+  //! \name Internal
+  //! \{
+
+  ASMJIT_API Error _prepareBlock(uint32_t side, size_t initialIndex) noexcept;
+  ASMJIT_API void _cleanupBlock(uint32_t side, size_t middleIndex) noexcept;
+
+  //! \}
+  //! \endcond
+};
+
+//! Zone allocated stack container.
+template<typename T>
+class ZoneStack : public ZoneStackBase {
+public:
+  ASMJIT_NONCOPYABLE(ZoneStack)
+
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    kNumBlockItems   = uint32_t((kBlockSize - sizeof(Block)) / sizeof(T)),
+    kStartBlockIndex = uint32_t(sizeof(Block)),
+    kMidBlockIndex   = uint32_t(kStartBlockIndex + (kNumBlockItems / 2) * sizeof(T)),
+    kEndBlockIndex   = uint32_t(kStartBlockIndex + (kNumBlockItems    ) * sizeof(T))
+  };
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneStack() noexcept {}
+  inline ~ZoneStack() noexcept {}
+
+  inline Error init(ZoneAllocator* allocator) noexcept { return _init(allocator, kMidBlockIndex); }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline Error prepend(T item) noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    Block* block = _block[kBlockIndexFirst];
+
+    if (!block->canPrepend<T>()) {
+      ASMJIT_PROPAGATE(_prepareBlock(kBlockIndexFirst, kEndBlockIndex));
+      block = _block[kBlockIndexFirst];
+    }
+
+    T* ptr = block->start<T>() - 1;
+    ASMJIT_ASSERT(ptr >= block->data<T>() && ptr <= block->data<T>() + (kNumBlockItems - 1));
+    *ptr = item;
+    block->setStart<T>(ptr);
+    return kErrorOk;
+  }
+
+  inline Error append(T item) noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    Block* block = _block[kBlockIndexLast];
+
+    if (!block->canAppend<T>()) {
+      ASMJIT_PROPAGATE(_prepareBlock(kBlockIndexLast, kStartBlockIndex));
+      block = _block[kBlockIndexLast];
+    }
+
+    T* ptr = block->end<T>();
+    ASMJIT_ASSERT(ptr >= block->data<T>() && ptr <= block->data<T>() + (kNumBlockItems - 1));
+
+    *ptr++ = item;
+    block->setEnd(ptr);
+    return kErrorOk;
+  }
+
+  inline T popFirst() noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    ASMJIT_ASSERT(!empty());
+
+    Block* block = _block[kBlockIndexFirst];
+    ASMJIT_ASSERT(!block->empty());
+
+    T* ptr = block->start<T>();
+    T item = *ptr++;
+
+    block->setStart(ptr);
+    if (block->empty())
+      _cleanupBlock(kBlockIndexFirst, kMidBlockIndex);
+
+    return item;
+  }
+
+  inline T pop() noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    ASMJIT_ASSERT(!empty());
+
+    Block* block = _block[kBlockIndexLast];
+    ASMJIT_ASSERT(!block->empty());
+
+    T* ptr = block->end<T>();
+    T item = *--ptr;
+    ASMJIT_ASSERT(ptr >= block->data<T>());
+    ASMJIT_ASSERT(ptr >= block->start<T>());
+
+    block->setEnd(ptr);
+    if (block->empty())
+      _cleanupBlock(kBlockIndexLast, kMidBlockIndex);
+
+    return item;
+  }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ZONESTACK_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/zonestring.h b/lib/lepton/asmjit/core/zonestring.h
new file mode 100644
index 0000000000..01f5bd89f6
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonestring.h
@@ -0,0 +1,120 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ZONESTRING_H_INCLUDED
+#define ASMJIT_CORE_ZONESTRING_H_INCLUDED
+
+#include "../core/globals.h"
+#include "../core/zone.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_zone
+//! \{
+
+//! A helper class used by \ref ZoneString implementation.
+struct ZoneStringBase {
+  union {
+    struct {
+      uint32_t _size;
+      char _embedded[sizeof(void*) * 2 - 4];
+    };
+    struct {
+      void* _dummy;
+      char* _external;
+    };
+  };
+
+  inline void reset() noexcept {
+    _dummy = nullptr;
+    _external = nullptr;
+  }
+
+  Error setData(Zone* zone, uint32_t maxEmbeddedSize, const char* str, size_t size) noexcept {
+    if (size == SIZE_MAX)
+      size = strlen(str);
+
+    if (size <= maxEmbeddedSize) {
+      memcpy(_embedded, str, size);
+      _embedded[size] = '\0';
+    }
+    else {
+      char* external = static_cast<char*>(zone->dup(str, size, true));
+      if (ASMJIT_UNLIKELY(!external))
+        return DebugUtils::errored(kErrorOutOfMemory);
+      _external = external;
+    }
+
+    _size = uint32_t(size);
+    return kErrorOk;
+  }
+};
+
+//! A string template that can be zone allocated.
+//!
+//! Helps with creating strings that can be either statically allocated if they are small, or externally allocated
+//! in case their size exceeds the limit. The `N` represents the size of the whole `ZoneString` structure, based on
+//! that size the maximum size of the internal buffer is determined.
+template<size_t N>
+class ZoneString {
+public:
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    kWholeSize = (N > sizeof(ZoneStringBase)) ? uint32_t(N) : uint32_t(sizeof(ZoneStringBase)),
+    kMaxEmbeddedSize = kWholeSize - 5
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  union {
+    ZoneStringBase _base;
+    char _wholeData[kWholeSize];
+  };
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneString() noexcept { reset(); }
+  inline void reset() noexcept { _base.reset(); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether the string is empty.
+  inline bool empty() const noexcept { return _base._size == 0; }
+
+  //! Returns the string data.
+  inline const char* data() const noexcept { return _base._size <= kMaxEmbeddedSize ? _base._embedded : _base._external; }
+  //! Returns the string size.
+  inline uint32_t size() const noexcept { return _base._size; }
+
+  //! Tests whether the string is embedded (e.g. no dynamically allocated).
+  inline bool isEmbedded() const noexcept { return _base._size <= kMaxEmbeddedSize; }
+
+  //! Copies a new `data` of the given `size` to the string.
+  //!
+  //! If the `size` exceeds the internal buffer the given `zone` will be used to duplicate the data, otherwise
+  //! the internal buffer will be used as a storage.
+  inline Error setData(Zone* zone, const char* data, size_t size) noexcept {
+    return _base.setData(zone, kMaxEmbeddedSize, data, size);
+  }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ZONESTRING_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/zonetree.cpp b/lib/lepton/asmjit/core/zonetree.cpp
new file mode 100644
index 0000000000..8c42af8c02
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonetree.cpp
@@ -0,0 +1,99 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/support.h"
+#include "../core/zone.h"
+#include "../core/zonetree.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// ZoneTreeBase - Tests
+// ====================
+
+#if defined(ASMJIT_TEST)
+template<typename NodeT>
+struct ZoneRBUnit {
+  typedef ZoneTree<NodeT> Tree;
+
+  static void verifyTree(Tree& tree) noexcept {
+    EXPECT(checkHeight(static_cast<NodeT*>(tree._root)) > 0);
+  }
+
+  // Check whether the Red-Black tree is valid.
+  static int checkHeight(NodeT* node) noexcept {
+    if (!node) return 1;
+
+    NodeT* ln = node->left();
+    NodeT* rn = node->right();
+
+    // Invalid tree.
+    EXPECT(ln == nullptr || *ln < *node);
+    EXPECT(rn == nullptr || *rn > *node);
+
+    // Red violation.
+    EXPECT(!node->isRed() ||
+          (!ZoneTreeNode::_isValidRed(ln) && !ZoneTreeNode::_isValidRed(rn)));
+
+    // Black violation.
+    int lh = checkHeight(ln);
+    int rh = checkHeight(rn);
+    EXPECT(!lh || !rh || lh == rh);
+
+    // Only count black links.
+    return (lh && rh) ? lh + !node->isRed() : 0;
+  }
+};
+
+class MyRBNode : public ZoneTreeNodeT<MyRBNode> {
+public:
+  ASMJIT_NONCOPYABLE(MyRBNode)
+
+  inline explicit MyRBNode(uint32_t key) noexcept
+    : _key(key) {}
+
+  inline bool operator<(const MyRBNode& other) const noexcept { return _key < other._key; }
+  inline bool operator>(const MyRBNode& other) const noexcept { return _key > other._key; }
+
+  inline bool operator<(uint32_t queryKey) const noexcept { return _key < queryKey; }
+  inline bool operator>(uint32_t queryKey) const noexcept { return _key > queryKey; }
+
+  uint32_t _key;
+};
+
+UNIT(zone_rbtree) {
+  uint32_t kCount = BrokenAPI::hasArg("--quick") ? 1000 : 10000;
+
+  Zone zone(4096);
+  ZoneTree<MyRBNode> rbTree;
+
+  uint32_t key;
+  INFO("Inserting %u elements to RBTree and validating each operation", unsigned(kCount));
+  for (key = 0; key < kCount; key++) {
+    rbTree.insert(zone.newT<MyRBNode>(key));
+    ZoneRBUnit<MyRBNode>::verifyTree(rbTree);
+  }
+
+  uint32_t count = kCount;
+  INFO("Removing %u elements from RBTree and validating each operation", unsigned(kCount));
+  do {
+    MyRBNode* node;
+
+    for (key = 0; key < count; key++) {
+      node = rbTree.get(key);
+      EXPECT(node != nullptr);
+      EXPECT(node->_key == key);
+    }
+
+    node = rbTree.get(--count);
+    rbTree.remove(node);
+    ZoneRBUnit<MyRBNode>::verifyTree(rbTree);
+  } while (count);
+
+  EXPECT(rbTree.empty());
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/zonetree.h b/lib/lepton/asmjit/core/zonetree.h
new file mode 100644
index 0000000000..c5dbc78f49
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonetree.h
@@ -0,0 +1,380 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ZONETREE_H_INCLUDED
+#define ASMJIT_CORE_ZONETREE_H_INCLUDED
+
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_zone
+//! \{
+
+//! RB-Tree node.
+//!
+//! The color is stored in a least significant bit of the `left` node.
+//!
+//! WARNING: Always use accessors to access left and right children.
+class ZoneTreeNode {
+public:
+  ASMJIT_NONCOPYABLE(ZoneTreeNode)
+
+  //! \name Constants
+  //! \{
+
+  enum : uintptr_t {
+    kRedMask = 0x1,
+    kPtrMask = ~kRedMask
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  uintptr_t _rbNodeData[2];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneTreeNode() noexcept
+    : _rbNodeData { 0, 0 } {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool isRed() const noexcept { return static_cast<bool>(_rbNodeData[0] & kRedMask); }
+
+  inline bool hasChild(size_t i) const noexcept { return _rbNodeData[i] > kRedMask; }
+  inline bool hasLeft() const noexcept { return _rbNodeData[0] > kRedMask; }
+  inline bool hasRight() const noexcept { return _rbNodeData[1] != 0; }
+
+  template<typename T = ZoneTreeNode>
+  inline T* child(size_t i) const noexcept { return static_cast<T*>(_getChild(i)); }
+  template<typename T = ZoneTreeNode>
+  inline T* left() const noexcept { return static_cast<T*>(_getLeft()); }
+  template<typename T = ZoneTreeNode>
+  inline T* right() const noexcept { return static_cast<T*>(_getRight()); }
+
+  //! \}
+
+  //! \cond INTERNAL
+  //! \name Internal
+  //! \{
+
+  inline ZoneTreeNode* _getChild(size_t i) const noexcept { return (ZoneTreeNode*)(_rbNodeData[i] & kPtrMask); }
+  inline ZoneTreeNode* _getLeft() const noexcept { return (ZoneTreeNode*)(_rbNodeData[0] & kPtrMask); }
+  inline ZoneTreeNode* _getRight() const noexcept { return (ZoneTreeNode*)(_rbNodeData[1]); }
+
+  inline void _setChild(size_t i, ZoneTreeNode* node) noexcept { _rbNodeData[i] = (_rbNodeData[i] & kRedMask) | (uintptr_t)node; }
+  inline void _setLeft(ZoneTreeNode* node) noexcept { _rbNodeData[0] = (_rbNodeData[0] & kRedMask) | (uintptr_t)node; }
+  inline void _setRight(ZoneTreeNode* node) noexcept { _rbNodeData[1] = (uintptr_t)node; }
+
+  inline void _makeRed() noexcept { _rbNodeData[0] |= kRedMask; }
+  inline void _makeBlack() noexcept { _rbNodeData[0] &= kPtrMask; }
+
+  //! Tests whether the node is RED (RED node must be non-null and must have RED flag set).
+  static inline bool _isValidRed(ZoneTreeNode* node) noexcept { return node && node->isRed(); }
+
+  //! \}
+  //! \endcond
+};
+
+//! RB-Tree node casted to `NodeT`.
+template<typename NodeT>
+class ZoneTreeNodeT : public ZoneTreeNode {
+public:
+  ASMJIT_NONCOPYABLE(ZoneTreeNodeT)
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneTreeNodeT() noexcept
+    : ZoneTreeNode() {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline NodeT* child(size_t i) const noexcept { return static_cast<NodeT*>(_getChild(i)); }
+  inline NodeT* left() const noexcept { return static_cast<NodeT*>(_getLeft()); }
+  inline NodeT* right() const noexcept { return static_cast<NodeT*>(_getRight()); }
+
+  //! \}
+};
+
+//! RB-Tree.
+template<typename NodeT>
+class ZoneTree {
+public:
+  ASMJIT_NONCOPYABLE(ZoneTree)
+
+  typedef NodeT Node;
+  NodeT* _root;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneTree() noexcept
+    : _root(nullptr) {}
+
+  inline ZoneTree(ZoneTree&& other) noexcept
+    : _root(other._root) {}
+
+  inline void reset() noexcept { _root = nullptr; }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool empty() const noexcept { return _root == nullptr; }
+  inline NodeT* root() const noexcept { return static_cast<NodeT*>(_root); }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void swap(ZoneTree& other) noexcept {
+    std::swap(_root, other._root);
+  }
+
+  template<typename CompareT = Support::Compare<Support::SortOrder::kAscending>>
+  void insert(NodeT* ASMJIT_NONNULL(node), const CompareT& cmp = CompareT()) noexcept {
+    // Node to insert must not contain garbage.
+    ASMJIT_ASSERT(!node->hasLeft());
+    ASMJIT_ASSERT(!node->hasRight());
+    ASMJIT_ASSERT(!node->isRed());
+
+    if (!_root) {
+      _root = node;
+      return;
+    }
+
+    ZoneTreeNode head;           // False root node,
+    head._setRight(_root);       // having root on the right.
+
+    ZoneTreeNode* g = nullptr;   // Grandparent.
+    ZoneTreeNode* p = nullptr;   // Parent.
+    ZoneTreeNode* t = &head;     // Iterator.
+    ZoneTreeNode* q = _root;     // Query.
+
+    size_t dir = 0;              // Direction for accessing child nodes.
+    size_t last = 0;             // Not needed to initialize, but makes some tools happy.
+
+    node->_makeRed();            // New nodes are always red and violations fixed appropriately.
+
+    // Search down the tree.
+    for (;;) {
+      if (!q) {
+        // Insert new node at the bottom.
+        q = node;
+        p->_setChild(dir, node);
+      }
+      else if (_isValidRed(q->_getLeft()) && _isValidRed(q->_getRight())) {
+        // Color flip.
+        q->_makeRed();
+        q->_getLeft()->_makeBlack();
+        q->_getRight()->_makeBlack();
+      }
+
+      // Fix red violation.
+      if (_isValidRed(q) && _isValidRed(p)) {
+        ASMJIT_ASSUME(g != nullptr);
+        ASMJIT_ASSUME(p != nullptr);
+        t->_setChild(t->_getRight() == g,
+                     q == p->_getChild(last) ? _singleRotate(g, !last) : _doubleRotate(g, !last));
+      }
+
+      // Stop if found.
+      if (q == node)
+        break;
+
+      last = dir;
+      dir = cmp(*static_cast<NodeT*>(q), *static_cast<NodeT*>(node)) < 0;
+
+      // Update helpers.
+      if (g) t = g;
+
+      g = p;
+      p = q;
+      q = q->_getChild(dir);
+    }
+
+    // Update root and make it black.
+    _root = static_cast<NodeT*>(head._getRight());
+    _root->_makeBlack();
+  }
+
+  //! Remove node from RBTree.
+  template<typename CompareT = Support::Compare<Support::SortOrder::kAscending>>
+  void remove(ZoneTreeNode* ASMJIT_NONNULL(node), const CompareT& cmp = CompareT()) noexcept {
+    ZoneTreeNode head;           // False root node,
+    head._setRight(_root);       // having root on the right.
+
+    ZoneTreeNode* g = nullptr;   // Grandparent.
+    ZoneTreeNode* p = nullptr;   // Parent.
+    ZoneTreeNode* q = &head;     // Query.
+
+    ZoneTreeNode* f  = nullptr;  // Found item.
+    ZoneTreeNode* gf = nullptr;  // Found grandparent.
+    size_t dir = 1;              // Direction (0 or 1).
+
+    // Search and push a red down.
+    while (q->hasChild(dir)) {
+      size_t last = dir;
+
+      // Update helpers.
+      g = p;
+      p = q;
+      q = q->_getChild(dir);
+      dir = cmp(*static_cast<NodeT*>(q), *static_cast<NodeT*>(node)) < 0;
+
+      // Save found node.
+      if (q == node) {
+        f = q;
+        gf = g;
+      }
+
+      // Push the red node down.
+      if (!_isValidRed(q) && !_isValidRed(q->_getChild(dir))) {
+        if (_isValidRed(q->_getChild(!dir))) {
+          ZoneTreeNode* child = _singleRotate(q, dir);
+          p->_setChild(last, child);
+          p = child;
+        }
+        else if (!_isValidRed(q->_getChild(!dir)) && p->_getChild(!last)) {
+          ZoneTreeNode* s = p->_getChild(!last);
+          if (!_isValidRed(s->_getChild(!last)) && !_isValidRed(s->_getChild(last))) {
+            // Color flip.
+            p->_makeBlack();
+            s->_makeRed();
+            q->_makeRed();
+          }
+          else {
+            ASMJIT_ASSUME(g != nullptr);
+            ASMJIT_ASSUME(s != nullptr);
+
+            size_t dir2 = g->_getRight() == p;
+            ZoneTreeNode* child = g->_getChild(dir2);
+
+            if (_isValidRed(s->_getChild(last))) {
+              child = _doubleRotate(p, last);
+              g->_setChild(dir2, child);
+            }
+            else if (_isValidRed(s->_getChild(!last))) {
+              child = _singleRotate(p, last);
+              g->_setChild(dir2, child);
+            }
+
+            // Ensure correct coloring.
+            q->_makeRed();
+            child->_makeRed();
+            child->_getLeft()->_makeBlack();
+            child->_getRight()->_makeBlack();
+          }
+        }
+      }
+    }
+
+    // Replace and remove.
+    ASMJIT_ASSERT(f != nullptr);
+    ASMJIT_ASSERT(f != &head);
+    ASMJIT_ASSERT(q != &head);
+
+    p->_setChild(p->_getRight() == q,
+                 q->_getChild(q->_getLeft() == nullptr));
+
+    // NOTE: The original algorithm used a trick to just copy 'key/value' to `f` and mark `q` for deletion. But this
+    // is unacceptable here as we really want to destroy the passed `node`. So, we have to make sure that we have
+    // really removed `f` and not `q`.
+    if (f != q) {
+      ASMJIT_ASSERT(f != &head);
+      ASMJIT_ASSERT(f != gf);
+
+      ZoneTreeNode* n = gf ? gf : &head;
+      dir = (n == &head) ? 1  : cmp(*static_cast<NodeT*>(n), *static_cast<NodeT*>(node)) < 0;
+
+      for (;;) {
+        if (n->_getChild(dir) == f) {
+          n->_setChild(dir, q);
+          // RAW copy, including the color.
+          q->_rbNodeData[0] = f->_rbNodeData[0];
+          q->_rbNodeData[1] = f->_rbNodeData[1];
+          break;
+        }
+
+        n = n->_getChild(dir);
+
+        // Cannot be true as we know that it must reach `f` in few iterations.
+        ASMJIT_ASSERT(n != nullptr);
+        dir = cmp(*static_cast<NodeT*>(n), *static_cast<NodeT*>(node)) < 0;
+      }
+    }
+
+    // Update root and make it black.
+    _root = static_cast<NodeT*>(head._getRight());
+    if (_root) _root->_makeBlack();
+  }
+
+  template<typename KeyT, typename CompareT = Support::Compare<Support::SortOrder::kAscending>>
+  inline NodeT* get(const KeyT& key, const CompareT& cmp = CompareT()) const noexcept {
+    ZoneTreeNode* node = _root;
+    while (node) {
+      auto result = cmp(*static_cast<const NodeT*>(node), key);
+      if (result == 0) break;
+
+      // Go left or right depending on the `result`.
+      node = node->_getChild(result < 0);
+    }
+    return static_cast<NodeT*>(node);
+  }
+
+  //! \}
+
+  //! \cond INTERNAL
+  //! \name Internal
+  //! \{
+
+  static inline bool _isValidRed(ZoneTreeNode* node) noexcept { return ZoneTreeNode::_isValidRed(node); }
+
+  //! Single rotation.
+  static inline ZoneTreeNode* _singleRotate(ZoneTreeNode* ASMJIT_NONNULL(root), size_t dir) noexcept {
+    ZoneTreeNode* save = root->_getChild(!dir);
+    ASMJIT_ASSUME(save != nullptr);
+
+    ZoneTreeNode* saveChild = save->_getChild(dir);
+    root->_setChild(!dir, saveChild);
+    save->_setChild( dir, root);
+    root->_makeRed();
+    save->_makeBlack();
+    return save;
+  }
+
+  //! Double rotation.
+  static inline ZoneTreeNode* _doubleRotate(ZoneTreeNode* ASMJIT_NONNULL(root), size_t dir) noexcept {
+    ZoneTreeNode* child = root->_getChild(!dir);
+    ASMJIT_ASSUME(child != nullptr);
+
+    root->_setChild(!dir, _singleRotate(child, !dir));
+    return _singleRotate(root, dir);
+  }
+
+  //! \}
+  //! \endcond
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ZONETREE_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/zonevector.cpp b/lib/lepton/asmjit/core/zonevector.cpp
new file mode 100644
index 0000000000..dfec5d5f79
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonevector.cpp
@@ -0,0 +1,356 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/support.h"
+#include "../core/zone.h"
+#include "../core/zonevector.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// ZoneVectorBase - Helpers
+// ========================
+
+Error ZoneVectorBase::_grow(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept {
+  uint32_t threshold = Globals::kGrowThreshold / sizeOfT;
+  uint32_t capacity = _capacity;
+  uint32_t after = _size;
+
+  if (ASMJIT_UNLIKELY(std::numeric_limits<uint32_t>::max() - n < after))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  after += n;
+  if (capacity >= after)
+    return kErrorOk;
+
+  // ZoneVector is used as an array to hold short-lived data structures used
+  // during code generation. The growing strategy is simple - use small capacity
+  // at the beginning (very good for ZoneAllocator) and then grow quicker to
+  // prevent successive reallocations.
+  if (capacity < 4)
+    capacity = 4;
+  else if (capacity < 8)
+    capacity = 8;
+  else if (capacity < 16)
+    capacity = 16;
+  else if (capacity < 64)
+    capacity = 64;
+  else if (capacity < 256)
+    capacity = 256;
+
+  while (capacity < after) {
+    if (capacity < threshold)
+      capacity *= 2;
+    else
+      capacity += threshold;
+  }
+
+  return _reserve(allocator, sizeOfT, capacity);
+}
+
+Error ZoneVectorBase::_reserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept {
+  uint32_t oldCapacity = _capacity;
+  if (oldCapacity >= n) return kErrorOk;
+
+  uint32_t nBytes = n * sizeOfT;
+  if (ASMJIT_UNLIKELY(nBytes < n))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  size_t allocatedBytes;
+  uint8_t* newData = static_cast<uint8_t*>(allocator->alloc(nBytes, allocatedBytes));
+
+  if (ASMJIT_UNLIKELY(!newData))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  void* oldData = _data;
+  if (_size)
+    memcpy(newData, oldData, size_t(_size) * sizeOfT);
+
+  if (oldData)
+    allocator->release(oldData, size_t(oldCapacity) * sizeOfT);
+
+  _capacity = uint32_t(allocatedBytes / sizeOfT);
+  ASMJIT_ASSERT(_capacity >= n);
+
+  _data = newData;
+  return kErrorOk;
+}
+
+Error ZoneVectorBase::_resize(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept {
+  uint32_t size = _size;
+
+  if (_capacity < n) {
+    ASMJIT_PROPAGATE(_grow(allocator, sizeOfT, n - size));
+    ASMJIT_ASSERT(_capacity >= n);
+  }
+
+  if (size < n)
+    memset(static_cast<uint8_t*>(_data) + size_t(size) * sizeOfT, 0, size_t(n - size) * sizeOfT);
+
+  _size = n;
+  return kErrorOk;
+}
+
+// ZoneBitVector - Operations
+// ==========================
+
+Error ZoneBitVector::copyFrom(ZoneAllocator* allocator, const ZoneBitVector& other) noexcept {
+  BitWord* data = _data;
+  uint32_t newSize = other.size();
+
+  if (!newSize) {
+    _size = 0;
+    return kErrorOk;
+  }
+
+  if (newSize > _capacity) {
+    // Realloc needed... Calculate the minimum capacity (in bytes) required.
+    uint32_t minimumCapacityInBits = Support::alignUp<uint32_t>(newSize, kBitWordSizeInBits);
+    if (ASMJIT_UNLIKELY(minimumCapacityInBits < newSize))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    // Normalize to bytes.
+    uint32_t minimumCapacity = minimumCapacityInBits / 8;
+    size_t allocatedCapacity;
+
+    BitWord* newData = static_cast<BitWord*>(allocator->alloc(minimumCapacity, allocatedCapacity));
+    if (ASMJIT_UNLIKELY(!newData))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    // `allocatedCapacity` now contains number in bytes, we need bits.
+    size_t allocatedCapacityInBits = allocatedCapacity * 8;
+
+    // Arithmetic overflow should normally not happen. If it happens we just
+    // change the `allocatedCapacityInBits` to the `minimumCapacityInBits` as
+    // this value is still safe to be used to call `_allocator->release(...)`.
+    if (ASMJIT_UNLIKELY(allocatedCapacityInBits < allocatedCapacity))
+      allocatedCapacityInBits = minimumCapacityInBits;
+
+    if (data)
+      allocator->release(data, _capacity / 8);
+    data = newData;
+
+    _data = data;
+    _capacity = uint32_t(allocatedCapacityInBits);
+  }
+
+  _size = newSize;
+  _copyBits(data, other.data(), _wordsPerBits(newSize));
+
+  return kErrorOk;
+}
+
+Error ZoneBitVector::_resize(ZoneAllocator* allocator, uint32_t newSize, uint32_t idealCapacity, bool newBitsValue) noexcept {
+  ASMJIT_ASSERT(idealCapacity >= newSize);
+
+  if (newSize <= _size) {
+    // The size after the resize is lesser than or equal to the current size.
+    uint32_t idx = newSize / kBitWordSizeInBits;
+    uint32_t bit = newSize % kBitWordSizeInBits;
+
+    // Just set all bits outside of the new size in the last word to zero.
+    // There is a case that there are not bits to set if `bit` is zero. This
+    // happens when `newSize` is a multiply of `kBitWordSizeInBits` like 64, 128,
+    // and so on. In that case don't change anything as that would mean settings
+    // bits outside of the `_size`.
+    if (bit)
+      _data[idx] &= (BitWord(1) << bit) - 1u;
+
+    _size = newSize;
+    return kErrorOk;
+  }
+
+  uint32_t oldSize = _size;
+  BitWord* data = _data;
+
+  if (newSize > _capacity) {
+    // Realloc needed, calculate the minimum capacity (in bytes) required.
+    uint32_t minimumCapacityInBits = Support::alignUp<uint32_t>(idealCapacity, kBitWordSizeInBits);
+
+    if (ASMJIT_UNLIKELY(minimumCapacityInBits < newSize))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    // Normalize to bytes.
+    uint32_t minimumCapacity = minimumCapacityInBits / 8;
+    size_t allocatedCapacity;
+
+    BitWord* newData = static_cast<BitWord*>(allocator->alloc(minimumCapacity, allocatedCapacity));
+    if (ASMJIT_UNLIKELY(!newData))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    // `allocatedCapacity` now contains number in bytes, we need bits.
+    size_t allocatedCapacityInBits = allocatedCapacity * 8;
+
+    // Arithmetic overflow should normally not happen. If it happens we just
+    // change the `allocatedCapacityInBits` to the `minimumCapacityInBits` as
+    // this value is still safe to be used to call `_allocator->release(...)`.
+    if (ASMJIT_UNLIKELY(allocatedCapacityInBits < allocatedCapacity))
+      allocatedCapacityInBits = minimumCapacityInBits;
+
+    _copyBits(newData, data, _wordsPerBits(oldSize));
+
+    if (data)
+      allocator->release(data, _capacity / 8);
+    data = newData;
+
+    _data = data;
+    _capacity = uint32_t(allocatedCapacityInBits);
+  }
+
+  // Start (of the old size) and end (of the new size) bits
+  uint32_t idx = oldSize / kBitWordSizeInBits;
+  uint32_t startBit = oldSize % kBitWordSizeInBits;
+  uint32_t endBit = newSize % kBitWordSizeInBits;
+
+  // Set new bits to either 0 or 1. The `pattern` is used to set multiple
+  // bits per bit-word and contains either all zeros or all ones.
+  BitWord pattern = Support::bitMaskFromBool<BitWord>(newBitsValue);
+
+  // First initialize the last bit-word of the old size.
+  if (startBit) {
+    uint32_t nBits = 0;
+
+    if (idx == (newSize / kBitWordSizeInBits)) {
+      // The number of bit-words is the same after the resize. In that case
+      // we need to set only bits necessary in the current last bit-word.
+      ASMJIT_ASSERT(startBit < endBit);
+      nBits = endBit - startBit;
+    }
+    else {
+      // There is be more bit-words after the resize. In that case we don't
+      // have to be extra careful about the last bit-word of the old size.
+      nBits = kBitWordSizeInBits - startBit;
+    }
+
+    data[idx++] |= pattern << nBits;
+  }
+
+  // Initialize all bit-words after the last bit-word of the old size.
+  uint32_t endIdx = _wordsPerBits(newSize);
+  while (idx < endIdx) data[idx++] = pattern;
+
+  // Clear unused bits of the last bit-word.
+  if (endBit)
+    data[endIdx - 1] = pattern & ((BitWord(1) << endBit) - 1);
+
+  _size = newSize;
+  return kErrorOk;
+}
+
+Error ZoneBitVector::_append(ZoneAllocator* allocator, bool value) noexcept {
+  uint32_t kThreshold = Globals::kGrowThreshold * 8;
+  uint32_t newSize = _size + 1;
+  uint32_t idealCapacity = _capacity;
+
+  if (idealCapacity < 128)
+    idealCapacity = 128;
+  else if (idealCapacity <= kThreshold)
+    idealCapacity *= 2;
+  else
+    idealCapacity += kThreshold;
+
+  if (ASMJIT_UNLIKELY(idealCapacity < _capacity)) {
+    if (ASMJIT_UNLIKELY(_size == std::numeric_limits<uint32_t>::max()))
+      return DebugUtils::errored(kErrorOutOfMemory);
+    idealCapacity = newSize;
+  }
+
+  return _resize(allocator, newSize, idealCapacity, value);
+}
+
+// ZoneVector / ZoneBitVector - Tests
+// ==================================
+
+#if defined(ASMJIT_TEST)
+template<typename T>
+static void test_zone_vector(ZoneAllocator* allocator, const char* typeName) {
+  int i;
+  int kMax = 100000;
+
+  ZoneVector<T> vec;
+
+  INFO("ZoneVector<%s> basic tests", typeName);
+  EXPECT(vec.append(allocator, 0) == kErrorOk);
+  EXPECT(vec.empty() == false);
+  EXPECT(vec.size() == 1);
+  EXPECT(vec.capacity() >= 1);
+  EXPECT(vec.indexOf(0) == 0);
+  EXPECT(vec.indexOf(-11) == Globals::kNotFound);
+
+  vec.clear();
+  EXPECT(vec.empty());
+  EXPECT(vec.size() == 0);
+  EXPECT(vec.indexOf(0) == Globals::kNotFound);
+
+  for (i = 0; i < kMax; i++) {
+    EXPECT(vec.append(allocator, T(i)) == kErrorOk);
+  }
+  EXPECT(vec.empty() == false);
+  EXPECT(vec.size() == uint32_t(kMax));
+  EXPECT(vec.indexOf(T(kMax - 1)) == uint32_t(kMax - 1));
+
+  EXPECT(vec.rbegin()[0] == kMax - 1);
+
+  vec.release(allocator);
+}
+
+static void test_zone_bitvector(ZoneAllocator* allocator) {
+  Zone zone(8096 - Zone::kBlockOverhead);
+
+  uint32_t i, count;
+  uint32_t kMaxCount = 100;
+
+  ZoneBitVector vec;
+  EXPECT(vec.empty());
+  EXPECT(vec.size() == 0);
+
+  INFO("ZoneBitVector::resize()");
+  for (count = 1; count < kMaxCount; count++) {
+    vec.clear();
+    EXPECT(vec.resize(allocator, count, false) == kErrorOk);
+    EXPECT(vec.size() == count);
+
+    for (i = 0; i < count; i++)
+      EXPECT(vec.bitAt(i) == false);
+
+    vec.clear();
+    EXPECT(vec.resize(allocator, count, true) == kErrorOk);
+    EXPECT(vec.size() == count);
+
+    for (i = 0; i < count; i++)
+      EXPECT(vec.bitAt(i) == true);
+  }
+
+  INFO("ZoneBitVector::fillBits() / clearBits()");
+  for (count = 1; count < kMaxCount; count += 2) {
+    vec.clear();
+    EXPECT(vec.resize(allocator, count) == kErrorOk);
+    EXPECT(vec.size() == count);
+
+    for (i = 0; i < (count + 1) / 2; i++) {
+      bool value = bool(i & 1);
+      if (value)
+        vec.fillBits(i, count - i * 2);
+      else
+        vec.clearBits(i, count - i * 2);
+    }
+
+    for (i = 0; i < count; i++) {
+      EXPECT(vec.bitAt(i) == bool(i & 1));
+    }
+  }
+}
+
+UNIT(zone_vector) {
+  Zone zone(8096 - Zone::kBlockOverhead);
+  ZoneAllocator allocator(&zone);
+
+  test_zone_vector<int>(&allocator, "int");
+  test_zone_vector<int64_t>(&allocator, "int64_t");
+  test_zone_bitvector(&allocator);
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/zonevector.h b/lib/lepton/asmjit/core/zonevector.h
new file mode 100644
index 0000000000..447c08cb92
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonevector.h
@@ -0,0 +1,690 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ZONEVECTOR_H_INCLUDED
+#define ASMJIT_CORE_ZONEVECTOR_H_INCLUDED
+
+#include "../core/support.h"
+#include "../core/zone.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_zone
+//! \{
+
+//! Base class used by \ref ZoneVector template.
+class ZoneVectorBase {
+public:
+  ASMJIT_NONCOPYABLE(ZoneVectorBase)
+
+  // STL compatibility;
+  typedef uint32_t size_type;
+  typedef ptrdiff_t difference_type;
+
+  //! Vector data (untyped).
+  void* _data = nullptr;
+  //! Size of the vector.
+  size_type _size = 0;
+  //! Capacity of the vector.
+  size_type _capacity = 0;
+
+protected:
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new instance of `ZoneVectorBase`.
+  inline ZoneVectorBase() noexcept {}
+
+  inline ZoneVectorBase(ZoneVectorBase&& other) noexcept
+    : _data(other._data),
+      _size(other._size),
+      _capacity(other._capacity) {}
+
+  //! \}
+
+  //! \cond INTERNAL
+  //! \name Internal
+  //! \{
+
+  inline void _release(ZoneAllocator* allocator, uint32_t sizeOfT) noexcept {
+    if (_data != nullptr) {
+      allocator->release(_data, _capacity * sizeOfT);
+      reset();
+    }
+  }
+
+  ASMJIT_API Error _grow(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept;
+  ASMJIT_API Error _resize(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept;
+  ASMJIT_API Error _reserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept;
+
+  inline void _swap(ZoneVectorBase& other) noexcept {
+    std::swap(_data, other._data);
+    std::swap(_size, other._size);
+    std::swap(_capacity, other._capacity);
+  }
+
+  //! \}
+  //! \endcond
+
+public:
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether the vector is empty.
+  inline bool empty() const noexcept { return _size == 0; }
+  //! Returns the vector size.
+  inline size_type size() const noexcept { return _size; }
+  //! Returns the vector capacity.
+  inline size_type capacity() const noexcept { return _capacity; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Makes the vector empty (won't change the capacity or data pointer).
+  inline void clear() noexcept { _size = 0; }
+  //! Resets the vector data and set its `size` to zero.
+  inline void reset() noexcept {
+    _data = nullptr;
+    _size = 0;
+    _capacity = 0;
+  }
+
+  //! Truncates the vector to at most `n` items.
+  inline void truncate(size_type n) noexcept {
+    _size = Support::min(_size, n);
+  }
+
+  //! Sets size of the vector to `n`. Used internally by some algorithms.
+  inline void _setSize(size_type n) noexcept {
+    ASMJIT_ASSERT(n <= _capacity);
+    _size = n;
+  }
+
+  //! \}
+};
+
+//! Template used to store and manage array of Zone allocated data.
+//!
+//! This template has these advantages over other std::vector<>:
+//! - Always non-copyable (designed to be non-copyable, we want it).
+//! - Optimized for working only with POD types.
+//! - Uses ZoneAllocator, thus small vectors are almost for free.
+//! - Explicit allocation, ZoneAllocator is not part of the data.
+template <typename T>
+class ZoneVector : public ZoneVectorBase {
+public:
+  ASMJIT_NONCOPYABLE(ZoneVector)
+
+  // STL compatibility;
+  typedef T value_type;
+  typedef T* pointer;
+  typedef const T* const_pointer;
+  typedef T& reference;
+  typedef const T& const_reference;
+
+  typedef T* iterator;
+  typedef const T* const_iterator;
+  typedef std::reverse_iterator<iterator> reverse_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneVector() noexcept : ZoneVectorBase() {}
+  inline ZoneVector(ZoneVector&& other) noexcept : ZoneVector(other) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns vector data.
+  inline T* data() noexcept { return static_cast<T*>(_data); }
+  //! Returns vector data (const)
+  inline const T* data() const noexcept { return static_cast<const T*>(_data); }
+
+  //! Returns item at the given index `i` (const).
+  inline const T& at(size_t i) const noexcept {
+    ASMJIT_ASSERT(i < _size);
+    return data()[i];
+  }
+
+  inline void _setEndPtr(T* p) noexcept {
+    ASMJIT_ASSERT(p >= data() && p <= data() + _capacity);
+    _setSize(uint32_t((uintptr_t)(p - data())));
+  }
+
+  //! \}
+
+  //! \name STL Compatibility (Iterators)
+  //! \{
+
+  inline iterator begin() noexcept { return iterator(data()); };
+  inline const_iterator begin() const noexcept { return const_iterator(data()); };
+
+  inline iterator end() noexcept { return iterator(data() + _size); };
+  inline const_iterator end() const noexcept { return const_iterator(data() + _size); };
+
+  inline reverse_iterator rbegin() noexcept { return reverse_iterator(end()); };
+  inline const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); };
+
+  inline reverse_iterator rend() noexcept { return reverse_iterator(begin()); };
+  inline const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); };
+
+  inline const_iterator cbegin() const noexcept { return const_iterator(data()); };
+  inline const_iterator cend() const noexcept { return const_iterator(data() + _size); };
+
+  inline const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(cend()); };
+  inline const_reverse_iterator crend() const noexcept { return const_reverse_iterator(cbegin()); };
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Swaps this vector with `other`.
+  ASMJIT_FORCE_INLINE void swap(ZoneVector<T>& other) noexcept { _swap(other); }
+
+  //! Prepends `item` to the vector.
+  ASMJIT_FORCE_INLINE Error prepend(ZoneAllocator* allocator, const T& item) noexcept {
+    if (ASMJIT_UNLIKELY(_size == _capacity))
+      ASMJIT_PROPAGATE(grow(allocator, 1));
+
+    ::memmove(static_cast<T*>(_data) + 1, _data, size_t(_size) * sizeof(T));
+    memcpy(_data, &item, sizeof(T));
+
+    _size++;
+    return kErrorOk;
+  }
+
+  //! Inserts an `item` at the specified `index`.
+  ASMJIT_FORCE_INLINE Error insert(ZoneAllocator* allocator, size_t index, const T& item) noexcept {
+    ASMJIT_ASSERT(index <= _size);
+
+    if (ASMJIT_UNLIKELY(_size == _capacity))
+      ASMJIT_PROPAGATE(grow(allocator, 1));
+
+    T* dst = static_cast<T*>(_data) + index;
+    ::memmove(dst + 1, dst, size_t(_size - index) * sizeof(T));
+    memcpy(dst, &item, sizeof(T));
+    _size++;
+
+    return kErrorOk;
+  }
+
+  //! Appends `item` to the vector.
+  ASMJIT_FORCE_INLINE Error append(ZoneAllocator* allocator, const T& item) noexcept {
+    if (ASMJIT_UNLIKELY(_size == _capacity))
+      ASMJIT_PROPAGATE(grow(allocator, 1));
+
+    memcpy(static_cast<T*>(_data) + _size, &item, sizeof(T));
+    _size++;
+
+    return kErrorOk;
+  }
+
+  //! Appends `other` vector at the end of this vector.
+  ASMJIT_FORCE_INLINE Error concat(ZoneAllocator* allocator, const ZoneVector<T>& other) noexcept {
+    uint32_t size = other._size;
+    if (_capacity - _size < size)
+      ASMJIT_PROPAGATE(grow(allocator, size));
+
+    if (size) {
+      memcpy(static_cast<T*>(_data) + _size, other._data, size_t(size) * sizeof(T));
+      _size += size;
+    }
+
+    return kErrorOk;
+  }
+
+  //! Prepends `item` to the vector (unsafe case).
+  //!
+  //! Can only be used together with `willGrow()`. If `willGrow(N)` returns `kErrorOk` then N elements
+  //! can be added to the vector without checking if there is a place for them. Used mostly internally.
+  ASMJIT_FORCE_INLINE void prependUnsafe(const T& item) noexcept {
+    ASMJIT_ASSERT(_size < _capacity);
+    T* data = static_cast<T*>(_data);
+
+    if (_size)
+      ::memmove(data + 1, data, size_t(_size) * sizeof(T));
+
+    memcpy(data, &item, sizeof(T));
+    _size++;
+  }
+
+  //! Append s`item` to the vector (unsafe case).
+  //!
+  //! Can only be used together with `willGrow()`. If `willGrow(N)` returns `kErrorOk` then N elements
+  //! can be added to the vector without checking if there is a place for them. Used mostly internally.
+  ASMJIT_FORCE_INLINE void appendUnsafe(const T& item) noexcept {
+    ASMJIT_ASSERT(_size < _capacity);
+
+    memcpy(static_cast<T*>(_data) + _size, &item, sizeof(T));
+    _size++;
+  }
+
+  //! Inserts an `item` at the specified `index` (unsafe case).
+  ASMJIT_FORCE_INLINE void insertUnsafe(size_t index, const T& item) noexcept {
+    ASMJIT_ASSERT(_size < _capacity);
+    ASMJIT_ASSERT(index <= _size);
+
+    T* dst = static_cast<T*>(_data) + index;
+    ::memmove(dst + 1, dst, size_t(_size - index) * sizeof(T));
+    memcpy(dst, &item, sizeof(T));
+    _size++;
+  }
+  //! Concatenates all items of `other` at the end of the vector.
+  ASMJIT_FORCE_INLINE void concatUnsafe(const ZoneVector<T>& other) noexcept {
+    uint32_t size = other._size;
+    ASMJIT_ASSERT(_capacity - _size >= size);
+
+    if (size) {
+      memcpy(static_cast<T*>(_data) + _size, other._data, size_t(size) * sizeof(T));
+      _size += size;
+    }
+  }
+
+  //! Returns index of the given `val` or `Globals::kNotFound` if it doesn't exist.
+  ASMJIT_FORCE_INLINE uint32_t indexOf(const T& val) const noexcept {
+    const T* data = static_cast<const T*>(_data);
+    uint32_t size = _size;
+
+    for (uint32_t i = 0; i < size; i++)
+      if (data[i] == val)
+        return i;
+    return Globals::kNotFound;
+  }
+
+  //! Tests whether the vector contains `val`.
+  inline bool contains(const T& val) const noexcept {
+    return indexOf(val) != Globals::kNotFound;
+  }
+
+  //! Removes item at index `i`.
+  inline void removeAt(size_t i) noexcept {
+    ASMJIT_ASSERT(i < _size);
+
+    T* data = static_cast<T*>(_data) + i;
+    size_t size = --_size - i;
+
+    if (size)
+      ::memmove(data, data + 1, size_t(size) * sizeof(T));
+  }
+
+  //! Pops the last element from the vector and returns it.
+  inline T pop() noexcept {
+    ASMJIT_ASSERT(_size > 0);
+
+    uint32_t index = --_size;
+    return data()[index];
+  }
+
+  template<typename CompareT = Support::Compare<Support::SortOrder::kAscending>>
+  inline void sort(const CompareT& cmp = CompareT()) noexcept {
+    Support::qSort<T, CompareT>(data(), size(), cmp);
+  }
+
+  //! Returns item at index `i`.
+  inline T& operator[](size_t i) noexcept {
+    ASMJIT_ASSERT(i < _size);
+    return data()[i];
+  }
+
+  //! Returns item at index `i`.
+  inline const T& operator[](size_t i) const noexcept {
+    ASMJIT_ASSERT(i < _size);
+    return data()[i];
+  }
+
+  //! Returns a reference to the first element of the vector.
+  //!
+  //! \note The vector must have at least one element. Attempting to use `first()` on empty vector will trigger
+  //! an assertion failure in debug builds.
+  inline T& first() noexcept { return operator[](0); }
+  //! \overload
+  inline const T& first() const noexcept { return operator[](0); }
+
+  //! Returns a reference to the last element of the vector.
+  //!
+  //! \note The vector must have at least one element. Attempting to use `last()` on empty vector will trigger
+  //! an assertion failure in debug builds.
+  inline T& last() noexcept { return operator[](_size - 1); }
+  //! \overload
+  inline const T& last() const noexcept { return operator[](_size - 1); }
+
+  //! \}
+
+  //! \name Memory Management
+  //! \{
+
+  //! Releases the memory held by `ZoneVector<T>` back to the `allocator`.
+  inline void release(ZoneAllocator* allocator) noexcept {
+    _release(allocator, sizeof(T));
+  }
+
+  //! Called to grow the buffer to fit at least `n` elements more.
+  inline Error grow(ZoneAllocator* allocator, uint32_t n) noexcept {
+    return ZoneVectorBase::_grow(allocator, sizeof(T), n);
+  }
+
+  //! Resizes the vector to hold `n` elements.
+  //!
+  //! If `n` is greater than the current size then the additional elements' content will be initialized to zero.
+  //! If `n` is less than the current size then the vector will be truncated to exactly `n` elements.
+  inline Error resize(ZoneAllocator* allocator, uint32_t n) noexcept {
+    return ZoneVectorBase::_resize(allocator, sizeof(T), n);
+  }
+
+  //! Reallocates the internal array to fit at least `n` items.
+  inline Error reserve(ZoneAllocator* allocator, uint32_t n) noexcept {
+    return n > _capacity ? ZoneVectorBase::_reserve(allocator, sizeof(T), n) : Error(kErrorOk);
+  }
+
+  inline Error willGrow(ZoneAllocator* allocator, uint32_t n = 1) noexcept {
+    return _capacity - _size < n ? grow(allocator, n) : Error(kErrorOk);
+  }
+
+  //! \}
+};
+
+//! Zone-allocated bit vector.
+class ZoneBitVector {
+public:
+  typedef Support::BitWord BitWord;
+
+  ASMJIT_NONCOPYABLE(ZoneBitVector)
+
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    kBitWordSizeInBits = Support::kBitWordSizeInBits
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Bits.
+  BitWord* _data = nullptr;
+  //! Size of the bit-vector (in bits).
+  uint32_t _size = 0;
+  //! Capacity of the bit-vector (in bits).
+  uint32_t _capacity = 0;
+
+  //! \}
+
+  //! \cond INTERNAL
+  //! \name Internal
+  //! \{
+
+  static inline uint32_t _wordsPerBits(uint32_t nBits) noexcept {
+    return ((nBits + kBitWordSizeInBits - 1) / kBitWordSizeInBits);
+  }
+
+  static inline void _zeroBits(BitWord* dst, uint32_t nBitWords) noexcept {
+    for (uint32_t i = 0; i < nBitWords; i++)
+      dst[i] = 0;
+  }
+
+  static inline void _fillBits(BitWord* dst, uint32_t nBitWords) noexcept {
+    for (uint32_t i = 0; i < nBitWords; i++)
+      dst[i] = ~BitWord(0);
+  }
+
+  static inline void _copyBits(BitWord* dst, const BitWord* src, uint32_t nBitWords) noexcept {
+    for (uint32_t i = 0; i < nBitWords; i++)
+      dst[i] = src[i];
+  }
+
+  //! \}
+  //! \endcond
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneBitVector() noexcept {}
+
+  inline ZoneBitVector(ZoneBitVector&& other) noexcept
+    : _data(other._data),
+      _size(other._size),
+      _capacity(other._capacity) {}
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline bool operator==(const ZoneBitVector& other) const noexcept { return  eq(other); }
+  inline bool operator!=(const ZoneBitVector& other) const noexcept { return !eq(other); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether the bit-vector is empty (has no bits).
+  inline bool empty() const noexcept { return _size == 0; }
+  //! Returns the size of this bit-vector (in bits).
+  inline uint32_t size() const noexcept { return _size; }
+  //! Returns the capacity of this bit-vector (in bits).
+  inline uint32_t capacity() const noexcept { return _capacity; }
+
+  //! Returns the size of the `BitWord[]` array in `BitWord` units.
+  inline uint32_t sizeInBitWords() const noexcept { return _wordsPerBits(_size); }
+  //! Returns the capacity of the `BitWord[]` array in `BitWord` units.
+  inline uint32_t capacityInBitWords() const noexcept { return _wordsPerBits(_capacity); }
+
+  //! REturns bit-vector data as `BitWord[]`.
+  inline BitWord* data() noexcept { return _data; }
+  //! \overload
+  inline const BitWord* data() const noexcept { return _data; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void swap(ZoneBitVector& other) noexcept {
+    std::swap(_data, other._data);
+    std::swap(_size, other._size);
+    std::swap(_capacity, other._capacity);
+  }
+
+  inline void clear() noexcept {
+    _size = 0;
+  }
+
+  inline void reset() noexcept {
+    _data = nullptr;
+    _size = 0;
+    _capacity = 0;
+  }
+
+  inline void truncate(uint32_t newSize) noexcept {
+    _size = Support::min(_size, newSize);
+    _clearUnusedBits();
+  }
+
+  inline bool bitAt(uint32_t index) const noexcept {
+    ASMJIT_ASSERT(index < _size);
+    return Support::bitVectorGetBit(_data, index);
+  }
+
+  inline void setBit(uint32_t index, bool value) noexcept {
+    ASMJIT_ASSERT(index < _size);
+    Support::bitVectorSetBit(_data, index, value);
+  }
+
+  inline void flipBit(uint32_t index) noexcept {
+    ASMJIT_ASSERT(index < _size);
+    Support::bitVectorFlipBit(_data, index);
+  }
+
+  ASMJIT_FORCE_INLINE Error append(ZoneAllocator* allocator, bool value) noexcept {
+    uint32_t index = _size;
+    if (ASMJIT_UNLIKELY(index >= _capacity))
+      return _append(allocator, value);
+
+    uint32_t idx = index / kBitWordSizeInBits;
+    uint32_t bit = index % kBitWordSizeInBits;
+
+    if (bit == 0)
+      _data[idx] = BitWord(value) << bit;
+    else
+      _data[idx] |= BitWord(value) << bit;
+
+    _size++;
+    return kErrorOk;
+  }
+
+  ASMJIT_API Error copyFrom(ZoneAllocator* allocator, const ZoneBitVector& other) noexcept;
+
+  ASMJIT_FORCE_INLINE void clearAll() noexcept {
+    _zeroBits(_data, _wordsPerBits(_size));
+  }
+
+  ASMJIT_FORCE_INLINE void fillAll() noexcept {
+    _fillBits(_data, _wordsPerBits(_size));
+    _clearUnusedBits();
+  }
+
+  ASMJIT_FORCE_INLINE void clearBits(uint32_t start, uint32_t count) noexcept {
+    ASMJIT_ASSERT(start <= _size);
+    ASMJIT_ASSERT(_size - start >= count);
+
+    Support::bitVectorClear(_data, start, count);
+  }
+
+  ASMJIT_FORCE_INLINE void fillBits(uint32_t start, uint32_t count) noexcept {
+    ASMJIT_ASSERT(start <= _size);
+    ASMJIT_ASSERT(_size - start >= count);
+
+    Support::bitVectorFill(_data, start, count);
+  }
+
+  //! Performs a logical bitwise AND between bits specified in this array and bits in `other`. If `other` has less
+  //! bits than `this` then all remaining bits are set to zero.
+  //!
+  //! \note The size of the BitVector is unaffected by this operation.
+  ASMJIT_FORCE_INLINE void and_(const ZoneBitVector& other) noexcept {
+    BitWord* dst = _data;
+    const BitWord* src = other._data;
+
+    uint32_t thisBitWordCount = sizeInBitWords();
+    uint32_t otherBitWordCount = other.sizeInBitWords();
+    uint32_t commonBitWordCount = Support::min(thisBitWordCount, otherBitWordCount);
+
+    uint32_t i = 0;
+    while (i < commonBitWordCount) {
+      dst[i] = dst[i] & src[i];
+      i++;
+    }
+
+    while (i < thisBitWordCount) {
+      dst[i] = 0;
+      i++;
+    }
+  }
+
+  //! Performs a logical bitwise AND between bits specified in this array and negated bits in `other`. If `other`
+  //! has less bits than `this` then all remaining bits are kept intact.
+  //!
+  //! \note The size of the BitVector is unaffected by this operation.
+  ASMJIT_FORCE_INLINE void andNot(const ZoneBitVector& other) noexcept {
+    BitWord* dst = _data;
+    const BitWord* src = other._data;
+
+    uint32_t commonBitWordCount = _wordsPerBits(Support::min(_size, other._size));
+    for (uint32_t i = 0; i < commonBitWordCount; i++)
+      dst[i] = dst[i] & ~src[i];
+  }
+
+  //! Performs a logical bitwise OP between bits specified in this array and bits in `other`. If `other` has less
+  //! bits than `this` then all remaining bits are kept intact.
+  //!
+  //! \note The size of the BitVector is unaffected by this operation.
+  ASMJIT_FORCE_INLINE void or_(const ZoneBitVector& other) noexcept {
+    BitWord* dst = _data;
+    const BitWord* src = other._data;
+
+    uint32_t commonBitWordCount = _wordsPerBits(Support::min(_size, other._size));
+    for (uint32_t i = 0; i < commonBitWordCount; i++)
+      dst[i] = dst[i] | src[i];
+    _clearUnusedBits();
+  }
+
+  ASMJIT_FORCE_INLINE void _clearUnusedBits() noexcept {
+    uint32_t idx = _size / kBitWordSizeInBits;
+    uint32_t bit = _size % kBitWordSizeInBits;
+
+    if (!bit)
+      return;
+    _data[idx] &= (BitWord(1) << bit) - 1u;
+  }
+
+  ASMJIT_FORCE_INLINE bool eq(const ZoneBitVector& other) const noexcept {
+    if (_size != other._size)
+      return false;
+
+    const BitWord* aData = _data;
+    const BitWord* bData = other._data;
+    uint32_t numBitWords = _wordsPerBits(_size);
+
+    for (uint32_t i = 0; i < numBitWords; i++)
+      if (aData[i] != bData[i])
+        return false;
+    return true;
+  }
+
+  //! \}
+
+  //! \name Memory Management
+  //! \{
+
+  inline void release(ZoneAllocator* allocator) noexcept {
+    if (!_data) return;
+    allocator->release(_data, _capacity / 8);
+    reset();
+  }
+
+  inline Error resize(ZoneAllocator* allocator, uint32_t newSize, bool newBitsValue = false) noexcept {
+    return _resize(allocator, newSize, newSize, newBitsValue);
+  }
+
+  ASMJIT_API Error _resize(ZoneAllocator* allocator, uint32_t newSize, uint32_t idealCapacity, bool newBitsValue) noexcept;
+  ASMJIT_API Error _append(ZoneAllocator* allocator, bool value) noexcept;
+
+  //! \}
+
+  //! \name Iterators
+  //! \{
+
+  class ForEachBitSet : public Support::BitVectorIterator<BitWord> {
+  public:
+    inline explicit ForEachBitSet(const ZoneBitVector& bitVector) noexcept
+      : Support::BitVectorIterator<BitWord>(bitVector.data(), bitVector.sizeInBitWords()) {}
+  };
+
+  template<class Operator>
+  class ForEachBitOp : public Support::BitVectorOpIterator<BitWord, Operator> {
+  public:
+    inline ForEachBitOp(const ZoneBitVector& a, const ZoneBitVector& b) noexcept
+      : Support::BitVectorOpIterator<BitWord, Operator>(a.data(), b.data(), a.sizeInBitWords()) {
+      ASMJIT_ASSERT(a.size() == b.size());
+    }
+  };
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ZONEVECTOR_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86.h b/lib/lepton/asmjit/x86.h
new file mode 100644
index 0000000000..84bc84bb2d
--- /dev/null
+++ b/lib/lepton/asmjit/x86.h
@@ -0,0 +1,93 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_H_INCLUDED
+#define ASMJIT_X86_H_INCLUDED
+
+//! \addtogroup asmjit_x86
+//!
+//! ### Namespace
+//!
+//!   - \ref x86 - x86 namespace provides support for X86/X64 code generation.
+//!
+//! ### Emitters
+//!
+//!   - \ref x86::Assembler - X86/X64 assembler (must read, provides examples).
+//!   - \ref x86::Builder - X86/X64 builder.
+//!   - \ref x86::Compiler - X86/X64 compiler.
+//!   - \ref x86::Emitter - X86/X64 emitter (abstract).
+//!
+//! ### Supported Instructions
+//!
+//!   - Emitters:
+//!     - \ref x86::EmitterExplicitT - Provides all instructions that use explicit operands, provides also utility
+//!       functions. The member functions provided are part of all X86 emitters.
+//!     - \ref x86::EmitterImplicitT - Provides all instructions that use implicit operands, these cannot be used
+//!       with \ref x86::Compiler.
+//!
+//!   - Instruction representation:
+//!     - \ref x86::Inst::Id - Provides instruction identifiers for both X86/X86_64 architectures.
+//!     - \ref InstOptions - Provides generic and X86/X86_64 specific options.
+//!
+//! ### Register Operands
+//!
+//!   - \ref x86::Reg - Base class for any X86 register.
+//!     - \ref x86::Gp - General purpose register:
+//!       - \ref x86::GpbLo - 8-bit low register.
+//!       - \ref x86::GpbHi - 8-bit high register.
+//!       - \ref x86::Gpw - 16-bit register.
+//!       - \ref x86::Gpd - 32-bit register.
+//!       - \ref x86::Gpq - 64-bit register (X64 only).
+//!     - \ref x86::Vec - Vector (SIMD) register:
+//!       - \ref x86::Xmm - 128-bit SIMD register (SSE+).
+//!       - \ref x86::Ymm - 256-bit SIMD register (AVX+).
+//!       - \ref x86::Zmm - 512-bit SIMD register (AVX512+).
+//!     - \ref x86::Mm - 64-bit MMX register.
+//!     - \ref x86::St - 80-bit FPU register.
+//!     - \ref x86::KReg - opmask registers (AVX512+).
+//!     - \ref x86::SReg - segment register.
+//!     - \ref x86::CReg - control register.
+//!     - \ref x86::DReg - debug register.
+//!     - \ref x86::Bnd - bound register (discontinued).
+//!     - \ref x86::Rip - relative instruction pointer.
+//!
+//! ### Memory Operands
+//!
+//!   - \ref x86::Mem - X86/X64 memory operand that provides support for all X86 and X64 addressing features
+//!     including absolute addresses, index scales, and segment override prefixes.
+//!
+//! ### Status and Control Words
+//!
+//!   - \ref x86::FpuStatusWord - FPU status word bits / decomposition.
+//!   - \ref x86::FpuControlWord - FPU control word bits / decomposition.
+//!
+//! ### Predicates (immediate values)
+//!
+//!   - \ref x86::CmpImm - `CMP[PD|PS|SD|SS]` predicate (SSE+).
+//!   - \ref x86::PCmpStrImm - `[V]PCMP[I|E]STR[I|M]` predicate (SSE4.1+, AVX+).
+//!   - \ref x86::RoundImm - `[V]ROUND[PD|PS|SD|SS]` predicate (SSE+, AVX+).
+//!   - \ref x86::VCmpImm - `VCMP[PD|PS|SD|SS]` predicate (AVX+).
+//!   - \ref x86::VFixupImm - `VFIXUPIMM[PD|PS|SD|SS]` predicate (AVX512+).
+//!   - \ref x86::VFPClassImm - `VFPCLASS[PD|PS|SD|SS]` predicate (AVX512+).
+//!   - \ref x86::VGetMantImm - `VGETMANT[PD|PS|SD|SS]` predicate (AVX512+).
+//!   - \ref x86::VPCmpImm - `VPCMP[U][B|W|D|Q]` predicate (AVX512+).
+//!   - \ref x86::VPComImm - `VPCOM[U][B|W|D|Q]` predicate (XOP).
+//!   - \ref x86::VRangeImm - `VRANGE[PD|PS|SD|SS]` predicate (AVX512+).
+//!   - \ref x86::VReduceImm - `REDUCE[PD|PS|SD|SS]` predicate (AVX512+).
+//!   - \ref x86::TLogImm - `VPTERNLOG[D|Q]` predicate and operations (AVX512+).
+
+#include "core.h"
+
+#include "asmjit-scope-begin.h"
+#include "x86/x86assembler.h"
+#include "x86/x86builder.h"
+#include "x86/x86compiler.h"
+#include "x86/x86emitter.h"
+#include "x86/x86globals.h"
+#include "x86/x86instdb.h"
+#include "x86/x86operand.h"
+#include "asmjit-scope-end.h"
+
+#endif // ASMJIT_X86_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86archtraits_p.h b/lib/lepton/asmjit/x86/x86archtraits_p.h
new file mode 100644
index 0000000000..90ae5d54f2
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86archtraits_p.h
@@ -0,0 +1,148 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86ARCHTRAITS_P_H_INCLUDED
+#define ASMJIT_X86_X86ARCHTRAITS_P_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/misc_p.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86 architecture traits (internal).
+static const constexpr ArchTraits x86ArchTraits = {
+  // SP/FP/LR/PC.
+  Gp::kIdSp, Gp::kIdBp, 0xFF, 0xFF,
+
+  // Reserved.
+  { 0, 0, 0 },
+
+  // HW stack alignment.
+  1,
+
+  // Min/Max stack offset
+  0x7FFFFFFFu, 0x7FFFFFFFu,
+
+  // ISA features [Gp, Vec, Other0, Other1].
+  {{
+    InstHints::kRegSwap | InstHints::kPushPop,
+    InstHints::kNoHints,
+    InstHints::kNoHints,
+    InstHints::kNoHints
+  }},
+
+  // Register signatures.
+  #define V(index) OperandSignature{x86::RegTraits<RegType(index)>::kSignature}
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // RegTypeToTypeId.
+  #define V(index) TypeId(x86::RegTraits<RegType(index)>::kTypeId)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // TypeIdToRegType.
+  #define V(index) (index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt8)    ? RegType::kX86_GpbLo : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt8)   ? RegType::kX86_GpbLo : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt16)   ? RegType::kX86_Gpw   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt16)  ? RegType::kX86_Gpw   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt32)   ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt32)  ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kIntPtr)  ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUIntPtr) ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat32) ? RegType::kX86_Xmm   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat64) ? RegType::kX86_Xmm   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask8)   ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask16)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask32)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask64)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMmx32)   ? RegType::kX86_Mm    : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMmx64)   ? RegType::kX86_Mm    : RegType::kNone)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // Word names of 8-bit, 16-bit, 32-bit, and 64-bit quantities.
+  {
+    ArchTypeNameId::kDB,
+    ArchTypeNameId::kDW,
+    ArchTypeNameId::kDD,
+    ArchTypeNameId::kDQ
+  }
+};
+
+//! X64 architecture traits (internal).
+static const constexpr ArchTraits x64ArchTraits = {
+  // SP/FP/LR/PC.
+  Gp::kIdSp, Gp::kIdBp, 0xFF, 0xFF,
+
+  // Reserved.
+  { 0, 0, 0 },
+
+  // HW stack alignment.
+  1,
+
+  // Min/Max stack offset
+  0x7FFFFFFFu, 0x7FFFFFFFu,
+
+  // ISA features [Gp, Vec, Other0, Other1].
+  {{
+    InstHints::kRegSwap | InstHints::kPushPop,
+    InstHints::kNoHints,
+    InstHints::kNoHints,
+    InstHints::kNoHints
+  }},
+
+  // Register signatures.
+  #define V(index) OperandSignature{x86::RegTraits<RegType(index)>::kSignature}
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // RegTypeToTypeId.
+  #define V(index) TypeId(x86::RegTraits<RegType(index)>::kTypeId)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // TypeIdToRegType.
+  #define V(index) (index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt8)    ? RegType::kX86_GpbLo : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt8)   ? RegType::kX86_GpbLo : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt16)   ? RegType::kX86_Gpw   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt16)  ? RegType::kX86_Gpw   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt32)   ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt32)  ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt64)   ? RegType::kX86_Gpq   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt64)  ? RegType::kX86_Gpq   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kIntPtr)  ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUIntPtr) ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat32) ? RegType::kX86_Xmm   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat64) ? RegType::kX86_Xmm   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask8)   ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask16)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask32)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask64)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMmx32)   ? RegType::kX86_Mm    : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMmx64)   ? RegType::kX86_Mm    : RegType::kNone)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // Word names of 8-bit, 16-bit, 32-bit, and 64-bit quantities.
+  {
+    ArchTypeNameId::kDB,
+    ArchTypeNameId::kDW,
+    ArchTypeNameId::kDD,
+    ArchTypeNameId::kDQ
+  }
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86ARCHTRAITS_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86assembler.cpp b/lib/lepton/asmjit/x86/x86assembler.cpp
new file mode 100644
index 0000000000..f11fea0023
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86assembler.cpp
@@ -0,0 +1,5110 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86)
+
+#include "../core/assembler.h"
+#include "../core/codewriter_p.h"
+#include "../core/cpuinfo.h"
+#include "../core/emitterutils_p.h"
+#include "../core/formatter.h"
+#include "../core/logger.h"
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../x86/x86assembler.h"
+#include "../x86/x86emithelper_p.h"
+#include "../x86/x86instapi_p.h"
+#include "../x86/x86instdb_p.h"
+#include "../x86/x86formatter_p.h"
+#include "../x86/x86opcode_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+typedef Support::FastUInt8 FastUInt8;
+
+// x86::Assembler - Constants
+// ==========================
+
+//! X86 bytes used to encode important prefixes.
+enum X86Byte : uint32_t {
+  //! 1-byte REX prefix mask.
+  kX86ByteRex = 0x40,
+
+  //! 1-byte REX.W component.
+  kX86ByteRexW = 0x08,
+
+  kX86ByteInvalidRex = 0x80,
+
+  //! 2-byte VEX prefix:
+  //!   - `[0]` - `0xC5`.
+  //!   - `[1]` - `RvvvvLpp`.
+  kX86ByteVex2 = 0xC5,
+
+  //! 3-byte VEX prefix:
+  //!   - `[0]` - `0xC4`.
+  //!   - `[1]` - `RXBmmmmm`.
+  //!   - `[2]` - `WvvvvLpp`.
+  kX86ByteVex3 = 0xC4,
+
+  //! 3-byte XOP prefix:
+  //!   - `[0]` - `0x8F`.
+  //!   - `[1]` - `RXBmmmmm`.
+  //!   - `[2]` - `WvvvvLpp`.
+  kX86ByteXop3 = 0x8F,
+
+  //! 4-byte EVEX prefix:
+  //!   - `[0]` - `0x62`.
+  //!   - `[1]` - Payload0 or `P[ 7: 0]` - `[R  X  B  R' 0  m  m  m]`.
+  //!   - `[2]` - Payload1 or `P[15: 8]` - `[W  v  v  v  v  1  p  p]`.
+  //!   - `[3]` - Payload2 or `P[23:16]` - `[z  L' L  b  V' a  a  a]`.
+  //!
+  //! Payload:
+  //!   - `P[ 2: 0]` - OPCODE: EVEX.mmmmm, only lowest 3 bits [2:0] used.
+  //!   - `P[    3]` - ______: Must be 0.
+  //!   - `P[    4]` - REG-ID: EVEX.R' - 5th bit of 'RRRRR'.
+  //!   - `P[    5]` - REG-ID: EVEX.B  - 4th bit of 'BBBBB'.
+  //!   - `P[    6]` - REG-ID: EVEX.X  - 5th bit of 'BBBBB' or 4th bit of 'XXXX' (with SIB).
+  //!   - `P[    7]` - REG-ID: EVEX.R  - 4th bit of 'RRRRR'.
+  //!   - `P[ 9: 8]` - OPCODE: EVEX.pp.
+  //!   - `P[   10]` - ______: Must be 1.
+  //!   - `P[14:11]` - REG-ID: 4 bits of 'VVVV'.
+  //!   - `P[   15]` - OPCODE: EVEX.W.
+  //!   - `P[18:16]` - REG-ID: K register k0...k7 (Merging/Zeroing Vector Ops).
+  //!   - `P[   19]` - REG-ID: 5th bit of 'VVVVV'.
+  //!   - `P[   20]` - OPCODE: Broadcast/Rounding Control/SAE bit.
+  //!   - `P[22.21]` - OPCODE: Vector Length (L' and  L) / Rounding Control.
+  //!   - `P[   23]` - OPCODE: Zeroing/Merging.
+  kX86ByteEvex = 0x62
+};
+
+// AsmJit specific (used to encode VVVVV field in XOP/VEX/EVEX).
+enum VexVVVVV : uint32_t {
+  kVexVVVVVShift = 7,
+  kVexVVVVVMask = 0x1F << kVexVVVVVShift
+};
+
+//! Instruction 2-byte/3-byte opcode prefix definition.
+struct X86OpcodeMM {
+  uint8_t size;
+  uint8_t data[3];
+};
+
+//! Mandatory prefixes used to encode legacy [66, F3, F2] or [9B] byte.
+static const uint8_t x86OpcodePP[8] = { 0x00, 0x66, 0xF3, 0xF2, 0x00, 0x00, 0x00, 0x9B };
+
+//! Instruction 2-byte/3-byte opcode prefix data.
+static const X86OpcodeMM x86OpcodeMM[] = {
+  { 0, { 0x00, 0x00, 0 } }, // #00 (0b0000).
+  { 1, { 0x0F, 0x00, 0 } }, // #01 (0b0001).
+  { 2, { 0x0F, 0x38, 0 } }, // #02 (0b0010).
+  { 2, { 0x0F, 0x3A, 0 } }, // #03 (0b0011).
+  { 2, { 0x0F, 0x01, 0 } }, // #04 (0b0100).
+  { 0, { 0x00, 0x00, 0 } }, // #05 (0b0101).
+  { 0, { 0x00, 0x00, 0 } }, // #06 (0b0110).
+  { 0, { 0x00, 0x00, 0 } }, // #07 (0b0111).
+  { 0, { 0x00, 0x00, 0 } }, // #08 (0b1000).
+  { 0, { 0x00, 0x00, 0 } }, // #09 (0b1001).
+  { 0, { 0x00, 0x00, 0 } }, // #0A (0b1010).
+  { 0, { 0x00, 0x00, 0 } }, // #0B (0b1011).
+  { 0, { 0x00, 0x00, 0 } }, // #0C (0b1100).
+  { 0, { 0x00, 0x00, 0 } }, // #0D (0b1101).
+  { 0, { 0x00, 0x00, 0 } }, // #0E (0b1110).
+  { 0, { 0x00, 0x00, 0 } }  // #0F (0b1111).
+};
+
+static const uint8_t x86SegmentPrefix[8] = {
+  0x00, // None.
+  0x26, // ES.
+  0x2E, // CS.
+  0x36, // SS.
+  0x3E, // DS.
+  0x64, // FS.
+  0x65  // GS.
+};
+
+static const uint32_t x86OpcodePushSReg[8] = {
+  Opcode::k000000 | 0x00, // None.
+  Opcode::k000000 | 0x06, // Push ES.
+  Opcode::k000000 | 0x0E, // Push CS.
+  Opcode::k000000 | 0x16, // Push SS.
+  Opcode::k000000 | 0x1E, // Push DS.
+  Opcode::k000F00 | 0xA0, // Push FS.
+  Opcode::k000F00 | 0xA8  // Push GS.
+};
+
+static const uint32_t x86OpcodePopSReg[8]  = {
+  Opcode::k000000 | 0x00, // None.
+  Opcode::k000000 | 0x07, // Pop ES.
+  Opcode::k000000 | 0x00, // Pop CS.
+  Opcode::k000000 | 0x17, // Pop SS.
+  Opcode::k000000 | 0x1F, // Pop DS.
+  Opcode::k000F00 | 0xA1, // Pop FS.
+  Opcode::k000F00 | 0xA9  // Pop GS.
+};
+
+// x86::Assembler - X86MemInfo | X86VEXPrefix | X86LLByRegType | X86CDisp8Table
+// ============================================================================
+
+//! Memory operand's info bits.
+//!
+//! A lookup table that contains various information based on the BASE and INDEX information of a memory operand. This
+//! is much better and safer than playing with IFs in the code and can check for errors must faster and better.
+enum X86MemInfo_Enum {
+  kX86MemInfo_0         = 0x00,
+
+  kX86MemInfo_BaseGp    = 0x01, //!< Has BASE reg, REX.B can be 1, compatible with REX.B byte.
+  kX86MemInfo_Index     = 0x02, //!< Has INDEX reg, REX.X can be 1, compatible with REX.X byte.
+
+  kX86MemInfo_BaseLabel = 0x10, //!< Base is Label.
+  kX86MemInfo_BaseRip   = 0x20, //!< Base is RIP.
+
+  kX86MemInfo_67H_X86   = 0x40, //!< Address-size override in 32-bit mode.
+  kX86MemInfo_67H_X64   = 0x80, //!< Address-size override in 64-bit mode.
+  kX86MemInfo_67H_Mask  = 0xC0  //!< Contains all address-size override bits.
+};
+
+template<uint32_t X>
+struct X86MemInfo_T {
+  enum : uint32_t {
+    B = (X     ) & 0x1F,
+    I = (X >> 5) & 0x1F,
+
+    kBase  = (B >= uint32_t(RegType::kX86_Gpw)  && B <= uint32_t(RegType::kX86_Gpq)) ? kX86MemInfo_BaseGp    :
+             (B == uint32_t(RegType::kX86_Rip)                                     ) ? kX86MemInfo_BaseRip   :
+             (B == uint32_t(RegType::kLabelTag)                                    ) ? kX86MemInfo_BaseLabel : 0,
+
+    kIndex = (I >= uint32_t(RegType::kX86_Gpw)  && I <= uint32_t(RegType::kX86_Gpq)) ? kX86MemInfo_Index     :
+             (I >= uint32_t(RegType::kX86_Xmm)  && I <= uint32_t(RegType::kX86_Zmm)) ? kX86MemInfo_Index     : 0,
+
+    k67H   = (B == uint32_t(RegType::kX86_Gpw)  && I == uint32_t(RegType::kNone)   ) ? kX86MemInfo_67H_X86   :
+             (B == uint32_t(RegType::kX86_Gpd)  && I == uint32_t(RegType::kNone)   ) ? kX86MemInfo_67H_X64   :
+             (B == uint32_t(RegType::kNone)     && I == uint32_t(RegType::kX86_Gpw)) ? kX86MemInfo_67H_X86   :
+             (B == uint32_t(RegType::kNone)     && I == uint32_t(RegType::kX86_Gpd)) ? kX86MemInfo_67H_X64   :
+             (B == uint32_t(RegType::kX86_Gpw)  && I == uint32_t(RegType::kX86_Gpw)) ? kX86MemInfo_67H_X86   :
+             (B == uint32_t(RegType::kX86_Gpd)  && I == uint32_t(RegType::kX86_Gpd)) ? kX86MemInfo_67H_X64   :
+             (B == uint32_t(RegType::kX86_Gpw)  && I == uint32_t(RegType::kX86_Xmm)) ? kX86MemInfo_67H_X86   :
+             (B == uint32_t(RegType::kX86_Gpd)  && I == uint32_t(RegType::kX86_Xmm)) ? kX86MemInfo_67H_X64   :
+             (B == uint32_t(RegType::kX86_Gpw)  && I == uint32_t(RegType::kX86_Ymm)) ? kX86MemInfo_67H_X86   :
+             (B == uint32_t(RegType::kX86_Gpd)  && I == uint32_t(RegType::kX86_Ymm)) ? kX86MemInfo_67H_X64   :
+             (B == uint32_t(RegType::kX86_Gpw)  && I == uint32_t(RegType::kX86_Zmm)) ? kX86MemInfo_67H_X86   :
+             (B == uint32_t(RegType::kX86_Gpd)  && I == uint32_t(RegType::kX86_Zmm)) ? kX86MemInfo_67H_X64   :
+             (B == uint32_t(RegType::kLabelTag) && I == uint32_t(RegType::kX86_Gpw)) ? kX86MemInfo_67H_X86   :
+             (B == uint32_t(RegType::kLabelTag) && I == uint32_t(RegType::kX86_Gpd)) ? kX86MemInfo_67H_X64   : 0,
+
+    kValue = kBase | kIndex | k67H | 0x04 | 0x08
+  };
+};
+
+// The result stored in the LUT is a combination of
+//   - 67H - Address override prefix - depends on BASE+INDEX register types and the target architecture.
+//   - REX - A possible combination of REX.[B|X|R|W] bits in REX prefix where REX.B and REX.X are possibly
+//           masked out, but REX.R and REX.W are kept as is.
+#define VALUE(x) X86MemInfo_T<x>::kValue
+static const uint8_t x86MemInfo[] = { ASMJIT_LOOKUP_TABLE_1024(VALUE, 0) };
+#undef VALUE
+
+// VEX3 or XOP xor bits applied to the opcode before emitted. The index to this table is 'mmmmm' value, which
+// contains all we need. This is only used by a 3 BYTE VEX and XOP prefixes, 2 BYTE VEX prefix is handled differently.
+// The idea is to minimize the difference between VEX3 vs XOP when encoding VEX or XOP instruction. This should
+// minimize the code required to emit such instructions and should also make it faster as we don't need any branch to
+// decide between VEX3 vs XOP.
+//            ____    ___
+// [_OPCODE_|WvvvvLpp|RXBmmmmm|VEX3_XOP]
+#define VALUE(x) ((x & 0x08) ? kX86ByteXop3 : kX86ByteVex3) | (0xF << 19) | (0x7 << 13)
+static const uint32_t x86VEXPrefix[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) };
+#undef VALUE
+
+// Table that contains LL opcode field addressed by a register size / 16. It's used to propagate L.256 or L.512 when
+// YMM or ZMM registers are used, respectively.
+#define VALUE(x) (x & (64 >> 4)) ? Opcode::kLL_2 : \
+                 (x & (32 >> 4)) ? Opcode::kLL_1 : Opcode::kLL_0
+static const uint32_t x86LLBySizeDiv16[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) };
+#undef VALUE
+
+// Table that contains LL opcode field addressed by a register size / 16. It's used to propagate L.256 or L.512 when
+// YMM or ZMM registers are used, respectively.
+#define VALUE(x) x == uint32_t(RegType::kX86_Zmm) ? Opcode::kLL_2 : \
+                 x == uint32_t(RegType::kX86_Ymm) ? Opcode::kLL_1 : Opcode::kLL_0
+static const uint32_t x86LLByRegType[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) };
+#undef VALUE
+
+// Table that contains a scale (shift left) based on 'TTWLL' field and the instruction's tuple-type (TT) field. The
+// scale is then applied to the BASE-N stored in each opcode to calculate the final compressed displacement used by
+// all EVEX encoded instructions.
+template<uint32_t X>
+struct X86CDisp8SHL_T {
+  enum {
+    TT = (X >> 3) << Opcode::kCDTT_Shift,
+    LL = (X >> 0) & 0x3,
+    W  = (X >> 2) & 0x1,
+
+    kValue = (TT == Opcode::kCDTT_None ? ((LL==0) ? 0 : (LL==1) ? 0   : 0  ) :
+              TT == Opcode::kCDTT_ByLL ? ((LL==0) ? 0 : (LL==1) ? 1   : 2  ) :
+              TT == Opcode::kCDTT_T1W  ? ((LL==0) ? W : (LL==1) ? 1+W : 2+W) :
+              TT == Opcode::kCDTT_DUP  ? ((LL==0) ? 0 : (LL==1) ? 2   : 3  ) : 0) << Opcode::kCDSHL_Shift
+  };
+};
+
+#define VALUE(x) X86CDisp8SHL_T<x>::kValue
+static const uint32_t x86CDisp8SHL[] = { ASMJIT_LOOKUP_TABLE_32(VALUE, 0) };
+#undef VALUE
+
+// Table that contains MOD byte of a 16-bit [BASE + disp] address.
+//   0xFF == Invalid.
+static const uint8_t x86Mod16BaseTable[8] = {
+  0xFF, // AX -> N/A.
+  0xFF, // CX -> N/A.
+  0xFF, // DX -> N/A.
+  0x07, // BX -> 111.
+  0xFF, // SP -> N/A.
+  0x06, // BP -> 110.
+  0x04, // SI -> 100.
+  0x05  // DI -> 101.
+};
+
+// Table that contains MOD byte of a 16-bit [BASE + INDEX + disp] combination.
+//   0xFF == Invalid.
+template<uint32_t X>
+struct X86Mod16BaseIndexTable_T {
+  enum {
+    B = X >> 3,
+    I = X & 0x7,
+
+    kValue = ((B == Gp::kIdBx && I == Gp::kIdSi) || (B == Gp::kIdSi && I == Gp::kIdBx)) ? 0x00 :
+             ((B == Gp::kIdBx && I == Gp::kIdDi) || (B == Gp::kIdDi && I == Gp::kIdBx)) ? 0x01 :
+             ((B == Gp::kIdBp && I == Gp::kIdSi) || (B == Gp::kIdSi && I == Gp::kIdBp)) ? 0x02 :
+             ((B == Gp::kIdBp && I == Gp::kIdDi) || (B == Gp::kIdDi && I == Gp::kIdBp)) ? 0x03 : 0xFF
+  };
+};
+
+#define VALUE(x) X86Mod16BaseIndexTable_T<x>::kValue
+static const uint8_t x86Mod16BaseIndexTable[] = { ASMJIT_LOOKUP_TABLE_64(VALUE, 0) };
+#undef VALUE
+
+// x86::Assembler - Helpers
+// ========================
+
+static ASMJIT_FORCE_INLINE bool x86IsJmpOrCall(InstId instId) noexcept {
+  return instId == Inst::kIdJmp || instId == Inst::kIdCall;
+}
+
+static ASMJIT_FORCE_INLINE bool x86IsImplicitMem(const Operand_& op, uint32_t base) noexcept {
+  return op.isMem() && op.as<Mem>().baseId() == base && !op.as<Mem>().hasOffset();
+}
+
+//! Combine `regId` and `vvvvvId` into a single value (used by AVX and AVX-512).
+static ASMJIT_FORCE_INLINE uint32_t x86PackRegAndVvvvv(uint32_t regId, uint32_t vvvvvId) noexcept {
+  return regId + (vvvvvId << kVexVVVVVShift);
+}
+
+static ASMJIT_FORCE_INLINE uint32_t x86OpcodeLByVMem(const Operand_& op) noexcept {
+  return x86LLByRegType[size_t(op.as<Mem>().indexType())];
+}
+
+static ASMJIT_FORCE_INLINE uint32_t x86OpcodeLBySize(uint32_t size) noexcept {
+  return x86LLBySizeDiv16[size / 16];
+}
+
+//! Encode MOD byte.
+static ASMJIT_FORCE_INLINE uint32_t x86EncodeMod(uint32_t m, uint32_t o, uint32_t rm) noexcept {
+  ASMJIT_ASSERT(m <= 3);
+  ASMJIT_ASSERT(o <= 7);
+  ASMJIT_ASSERT(rm <= 7);
+  return (m << 6) + (o << 3) + rm;
+}
+
+//! Encode SIB byte.
+static ASMJIT_FORCE_INLINE uint32_t x86EncodeSib(uint32_t s, uint32_t i, uint32_t b) noexcept {
+  ASMJIT_ASSERT(s <= 3);
+  ASMJIT_ASSERT(i <= 7);
+  ASMJIT_ASSERT(b <= 7);
+  return (s << 6) + (i << 3) + b;
+}
+
+static ASMJIT_FORCE_INLINE bool x86IsRexInvalid(uint32_t rex) noexcept {
+  // Validates the following possibilities:
+  //   REX == 0x00      -> OKAY (X86_32 / X86_64).
+  //   REX == 0x40-0x4F -> OKAY (X86_64).
+  //   REX == 0x80      -> OKAY (X86_32 mode, rex prefix not used).
+  //   REX == 0x81-0xCF -> BAD  (X86_32 mode, rex prefix used).
+  return rex > kX86ByteInvalidRex;
+}
+
+static ASMJIT_FORCE_INLINE uint32_t x86GetForceEvex3MaskInLastBit(InstOptions options) noexcept {
+  constexpr uint32_t kVex3Bit = Support::ConstCTZ<uint32_t(InstOptions::kX86_Vex3)>::value;
+  return uint32_t(options & InstOptions::kX86_Vex3) << (31 - kVex3Bit);
+}
+
+template<typename T>
+static ASMJIT_FORCE_INLINE constexpr T x86SignExtendI32(T imm) noexcept { return T(int64_t(int32_t(imm & T(0xFFFFFFFF)))); }
+
+static ASMJIT_FORCE_INLINE uint32_t x86AltOpcodeOf(const InstDB::InstInfo* info) noexcept {
+  return InstDB::_altOpcodeTable[info->_altOpcodeIndex];
+}
+
+// x86::Assembler - X86BufferWriter
+// ================================
+
+class X86BufferWriter : public CodeWriter {
+public:
+  ASMJIT_FORCE_INLINE explicit X86BufferWriter(Assembler* a) noexcept
+    : CodeWriter(a) {}
+
+  ASMJIT_FORCE_INLINE void emitPP(uint32_t opcode) noexcept {
+    uint32_t ppIndex = (opcode              >> Opcode::kPP_Shift) &
+                       (Opcode::kPP_FPUMask >> Opcode::kPP_Shift) ;
+    emit8If(x86OpcodePP[ppIndex], ppIndex != 0);
+  }
+
+  ASMJIT_FORCE_INLINE void emitMMAndOpcode(uint32_t opcode) noexcept {
+    uint32_t mmIndex = (opcode & Opcode::kMM_Mask) >> Opcode::kMM_Shift;
+    const X86OpcodeMM& mmCode = x86OpcodeMM[mmIndex];
+
+    emit8If(mmCode.data[0], mmCode.size > 0);
+    emit8If(mmCode.data[1], mmCode.size > 1);
+    emit8(opcode);
+  }
+
+  ASMJIT_FORCE_INLINE void emitSegmentOverride(uint32_t segmentId) noexcept {
+    ASMJIT_ASSERT(segmentId < ASMJIT_ARRAY_SIZE(x86SegmentPrefix));
+
+    FastUInt8 prefix = x86SegmentPrefix[segmentId];
+    emit8If(prefix, prefix != 0);
+  }
+
+  template<typename CondT>
+  ASMJIT_FORCE_INLINE void emitAddressOverride(CondT condition) noexcept {
+    emit8If(0x67, condition);
+  }
+
+  ASMJIT_FORCE_INLINE void emitImmByteOrDWord(uint64_t immValue, FastUInt8 immSize) noexcept {
+    if (!immSize)
+      return;
+
+    ASMJIT_ASSERT(immSize == 1 || immSize == 4);
+
+#if ASMJIT_ARCH_BITS >= 64
+    uint64_t imm = uint64_t(immValue);
+#else
+    uint32_t imm = uint32_t(immValue & 0xFFFFFFFFu);
+#endif
+
+    // Many instructions just use a single byte immediate, so make it fast.
+    emit8(imm & 0xFFu);
+    if (immSize == 1) return;
+
+    imm >>= 8;
+    emit8(imm & 0xFFu);
+    imm >>= 8;
+    emit8(imm & 0xFFu);
+    imm >>= 8;
+    emit8(imm & 0xFFu);
+  }
+
+  ASMJIT_FORCE_INLINE void emitImmediate(uint64_t immValue, FastUInt8 immSize) noexcept {
+#if ASMJIT_ARCH_BITS >= 64
+    uint64_t imm = immValue;
+    if (immSize >= 4) {
+      emit32uLE(imm & 0xFFFFFFFFu);
+      imm >>= 32;
+      immSize = FastUInt8(immSize - 4u);
+    }
+#else
+    uint32_t imm = uint32_t(immValue & 0xFFFFFFFFu);
+    if (immSize >= 4) {
+      emit32uLE(imm);
+      imm = uint32_t(immValue >> 32);
+      immSize = FastUInt8(immSize - 4u);
+    }
+#endif
+
+    if (!immSize)
+      return;
+    emit8(imm & 0xFFu);
+    imm >>= 8;
+
+    if (--immSize == 0)
+      return;
+    emit8(imm & 0xFFu);
+    imm >>= 8;
+
+    if (--immSize == 0)
+      return;
+    emit8(imm & 0xFFu);
+    imm >>= 8;
+
+    if (--immSize == 0)
+      return;
+    emit8(imm & 0xFFu);
+  }
+};
+
+// If the operand is BPL|SPL|SIL|DIL|R8B-15B
+//   - Force REX prefix
+// If the operand is AH|BH|CH|DH
+//   - patch its index from 0..3 to 4..7 as encoded by X86.
+//   - Disallow REX prefix.
+#define FIXUP_GPB(REG_OP, REG_ID)                                \
+  do {                                                           \
+    if (!static_cast<const Gp&>(REG_OP).isGpbHi()) {             \
+      options |= (REG_ID) >= 4 ? InstOptions::kX86_Rex           \
+                               : InstOptions::kNone;             \
+    }                                                            \
+    else {                                                       \
+      options |= InstOptions::kX86_InvalidRex;                   \
+      REG_ID += 4;                                               \
+    }                                                            \
+  } while (0)
+
+#define ENC_OPS1(OP0) \
+  (uint32_t(OperandType::k##OP0))
+
+#define ENC_OPS2(OP0, OP1) \
+  (uint32_t(OperandType::k##OP0) + \
+  (uint32_t(OperandType::k##OP1) << 3))
+
+#define ENC_OPS3(OP0, OP1, OP2) \
+  (uint32_t(OperandType::k##OP0) + \
+  (uint32_t(OperandType::k##OP1) << 3) + \
+  (uint32_t(OperandType::k##OP2) << 6))
+
+#define ENC_OPS4(OP0, OP1, OP2, OP3) \
+  (uint32_t(OperandType::k##OP0) + \
+  (uint32_t(OperandType::k##OP1) << 3) + \
+  (uint32_t(OperandType::k##OP2) << 6) + \
+  (uint32_t(OperandType::k##OP3) << 9))
+
+// x86::Assembler - Movabs Heuristics
+// ==================================
+
+static ASMJIT_FORCE_INLINE uint32_t x86GetMovAbsInstSize64Bit(uint32_t regSize, InstOptions options, const Mem& rmRel) noexcept {
+  uint32_t segmentPrefixSize = rmRel.segmentId() != 0;
+  uint32_t _66hPrefixSize = regSize == 2;
+  uint32_t rexPrefixSize = regSize == 8 || Support::test(options, InstOptions::kX86_Rex);
+  uint32_t opCodeByteSize = 1;
+  uint32_t immediateSize = 8;
+
+  return segmentPrefixSize + _66hPrefixSize + rexPrefixSize + opCodeByteSize + immediateSize;
+}
+
+static ASMJIT_FORCE_INLINE bool x86ShouldUseMovabs(Assembler* self, X86BufferWriter& writer, uint32_t regSize, InstOptions options, const Mem& rmRel) noexcept {
+  if (self->is32Bit()) {
+    // There is no relative addressing, just decide whether to use MOV encoded with MOD R/M or absolute.
+    return !Support::test(options, InstOptions::kX86_ModMR | InstOptions::kX86_ModMR);
+  }
+  else {
+    // If the addressing type is REL or MOD R/M was specified then absolute mov won't be used.
+    if (rmRel.addrType() == Mem::AddrType::kRel || Support::test(options, InstOptions::kX86_ModMR))
+      return false;
+
+    int64_t addrValue = rmRel.offset();
+    uint64_t baseAddress = self->code()->baseAddress();
+
+    // If the address type is default, it means to basically check whether relative addressing is possible. However,
+    // this is only possible when the base address is known - relative encoding uses RIP+N it has to be calculated.
+    if (rmRel.addrType() == Mem::AddrType::kDefault && baseAddress != Globals::kNoBaseAddress && !rmRel.hasSegment()) {
+      uint32_t instructionSize = x86GetMovAbsInstSize64Bit(regSize, options, rmRel);
+      uint64_t virtualOffset = uint64_t(writer.offsetFrom(self->_bufferData));
+      uint64_t rip64 = baseAddress + self->_section->offset() + virtualOffset + instructionSize;
+      uint64_t rel64 = uint64_t(addrValue) - rip64;
+
+      if (Support::isInt32(int64_t(rel64)))
+        return false;
+    }
+    else {
+      if (Support::isInt32(addrValue))
+        return false;
+    }
+
+    return uint64_t(addrValue) > 0xFFFFFFFFu;
+  }
+}
+
+// x86::Assembler - Construction & Destruction
+// ===========================================
+
+Assembler::Assembler(CodeHolder* code) noexcept : BaseAssembler() {
+  _archMask = (uint64_t(1) << uint32_t(Arch::kX86)) |
+              (uint64_t(1) << uint32_t(Arch::kX64)) ;
+  assignEmitterFuncs(this);
+
+  if (code)
+    code->attach(this);
+}
+Assembler::~Assembler() noexcept {}
+
+// x86::Assembler - Emit (Low-Level)
+// =================================
+
+ASMJIT_FAVOR_SPEED Error Assembler::_emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) {
+  constexpr uint32_t kVSHR_W     = Opcode::kW_Shift  - 23;
+  constexpr uint32_t kVSHR_PP    = Opcode::kPP_Shift - 16;
+  constexpr uint32_t kVSHR_PP_EW = Opcode::kPP_Shift - 16;
+
+  constexpr InstOptions kRequiresSpecialHandling =
+    InstOptions::kReserved     |   // Logging/Validation/Error.
+    InstOptions::kX86_Rep      |   // REP/REPE prefix.
+    InstOptions::kX86_Repne    |   // REPNE prefix.
+    InstOptions::kX86_Lock     |   // LOCK prefix.
+    InstOptions::kX86_XAcquire |   // XACQUIRE prefix.
+    InstOptions::kX86_XRelease ;   // XRELEASE prefix.
+
+  Error err;
+
+  Opcode opcode;                   // Instruction opcode.
+  InstOptions options;             // Instruction options.
+  uint32_t isign3;                 // A combined signature of first 3 operands.
+
+  const Operand_* rmRel;           // Memory operand or operand that holds Label|Imm.
+  uint32_t rmInfo;                 // Memory operand's info based on x86MemInfo.
+  uint32_t rbReg;                  // Memory base or modRM register.
+  uint32_t rxReg;                  // Memory index register.
+  uint32_t opReg;                  // ModR/M opcode or register id.
+
+  LabelEntry* label;               // Label entry.
+  RelocEntry* re = nullptr;        // Relocation entry.
+  int32_t relOffset;               // Relative offset
+  FastUInt8 relSize = 0;           // Relative size.
+  uint8_t* memOpAOMark = nullptr;  // Marker that points before 'address-override prefix' is emitted.
+
+  int64_t immValue = 0;            // Immediate value (must be 64-bit).
+  FastUInt8 immSize = 0;           // Immediate size.
+
+  X86BufferWriter writer(this);
+
+  if (instId >= Inst::_kIdCount)
+    instId = 0;
+
+  const InstDB::InstInfo* instInfo = &InstDB::_instInfoTable[instId];
+  const InstDB::CommonInfo* commonInfo = &instInfo->commonInfo();
+
+  // Signature of the first 3 operands.
+  isign3 = (uint32_t(o0.opType())     ) +
+           (uint32_t(o1.opType()) << 3) +
+           (uint32_t(o2.opType()) << 6);
+
+  // Combine all instruction options and also check whether the instruction is valid. All options
+  // that require special handling (including invalid instruction) are handled by the next branch.
+  options = InstOptions((instId == 0) | ((size_t)(_bufferEnd - writer.cursor()) < 16)) | instOptions() | forcedInstOptions();
+
+  // Handle failure and rare cases first.
+  if (ASMJIT_UNLIKELY(Support::test(options, kRequiresSpecialHandling))) {
+    if (ASMJIT_UNLIKELY(!_code))
+      return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+    // Unknown instruction.
+    if (ASMJIT_UNLIKELY(instId == 0))
+      goto InvalidInstruction;
+
+    // Grow request, happens rarely.
+    err = writer.ensureSpace(this, 16);
+    if (ASMJIT_UNLIKELY(err))
+      goto Failed;
+
+#ifndef ASMJIT_NO_VALIDATION
+    // Strict validation.
+    if (hasDiagnosticOption(DiagnosticOptions::kValidateAssembler)) {
+      Operand_ opArray[Globals::kMaxOpCount];
+      EmitterUtils::opArrayFromEmitArgs(opArray, o0, o1, o2, opExt);
+
+      err = _funcs.validate(arch(), BaseInst(instId, options, _extraReg), opArray, Globals::kMaxOpCount, ValidationFlags::kNone);
+      if (ASMJIT_UNLIKELY(err))
+        goto Failed;
+    }
+#endif
+
+    InstDB::InstFlags iFlags = instInfo->flags();
+
+    // LOCK, XACQUIRE, and XRELEASE prefixes.
+    if (Support::test(options, InstOptions::kX86_Lock)) {
+      bool xAcqRel = Support::test(options, InstOptions::kX86_XAcquire | InstOptions::kX86_XRelease);
+
+      if (ASMJIT_UNLIKELY(!Support::test(iFlags, InstDB::InstFlags::kLock) && !xAcqRel))
+        goto InvalidLockPrefix;
+
+      if (xAcqRel) {
+        if (ASMJIT_UNLIKELY(Support::test(options, InstOptions::kX86_XAcquire) && !Support::test(iFlags, InstDB::InstFlags::kXAcquire)))
+          goto InvalidXAcquirePrefix;
+
+        if (ASMJIT_UNLIKELY(Support::test(options, InstOptions::kX86_XRelease) && !Support::test(iFlags, InstDB::InstFlags::kXRelease)))
+          goto InvalidXReleasePrefix;
+
+        writer.emit8(Support::test(options, InstOptions::kX86_XAcquire) ? 0xF2 : 0xF3);
+      }
+
+      writer.emit8(0xF0);
+    }
+
+    // REP and REPNE prefixes.
+    if (Support::test(options, InstOptions::kX86_Rep | InstOptions::kX86_Repne)) {
+      if (ASMJIT_UNLIKELY(!Support::test(iFlags, InstDB::InstFlags::kRep)))
+        goto InvalidRepPrefix;
+
+      if (ASMJIT_UNLIKELY(_extraReg.isReg() && (_extraReg.group() != RegGroup::kGp || _extraReg.id() != Gp::kIdCx)))
+        goto InvalidRepPrefix;
+
+      writer.emit8(Support::test(options, InstOptions::kX86_Repne) ? 0xF2 : 0xF3);
+    }
+  }
+
+  // This sequence seems to be the fastest.
+  opcode = InstDB::_mainOpcodeTable[instInfo->_mainOpcodeIndex];
+  opReg = opcode.extractModO();
+  rbReg = 0;
+  opcode |= instInfo->_mainOpcodeValue;
+
+  // Encoding Scope
+  // --------------
+
+  // How it works? Each case here represents a unique encoding of a group of instructions, which is handled
+  // separately. The handlers check instruction signature, possibly register types, etc, and process this
+  // information by writing some bits to opcode, opReg/rbReg, immValue/immSize, etc, and then at the end of
+  // the sequence it uses goto to jump into a lower level handler, that actually encodes the instruction.
+
+  switch (instInfo->_encoding) {
+    case InstDB::kEncodingNone:
+      goto EmitDone;
+
+    // Base Instructions
+    // -----------------
+
+    case InstDB::kEncodingX86Op:
+      goto EmitX86Op;
+
+    case InstDB::kEncodingX86Op_Mod11RM:
+      rbReg = opcode.extractModRM();
+      goto EmitX86R;
+
+    case InstDB::kEncodingX86Op_Mod11RM_I8:
+      // The first operand must be immediate, we don't care of other operands as they could be implicit.
+      if (!o0.isImm())
+        goto InvalidInstruction;
+
+      rbReg = opcode.extractModRM();
+      immValue = o0.as<Imm>().valueAs<uint8_t>();
+      immSize = 1;
+      goto EmitX86R;
+
+    case InstDB::kEncodingX86Op_xAddr:
+      if (ASMJIT_UNLIKELY(!o0.isReg()))
+        goto InvalidInstruction;
+
+      rmInfo = x86MemInfo[size_t(o0.as<Reg>().type())];
+      writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
+      goto EmitX86Op;
+
+    case InstDB::kEncodingX86Op_xAX:
+      if (isign3 == 0)
+        goto EmitX86Op;
+
+      if (isign3 == ENC_OPS1(Reg) && o0.id() == Gp::kIdAx)
+        goto EmitX86Op;
+      break;
+
+    case InstDB::kEncodingX86Op_xDX_xAX:
+      if (isign3 == 0)
+        goto EmitX86Op;
+
+      if (isign3 == ENC_OPS2(Reg, Reg) && o0.id() == Gp::kIdDx && o1.id() == Gp::kIdAx)
+        goto EmitX86Op;
+      break;
+
+    case InstDB::kEncodingX86Op_MemZAX:
+      if (isign3 == 0)
+        goto EmitX86Op;
+
+      rmRel = &o0;
+      if (isign3 == ENC_OPS1(Mem) && x86IsImplicitMem(o0, Gp::kIdAx))
+        goto EmitX86OpImplicitMem;
+
+      break;
+
+    case InstDB::kEncodingX86I_xAX:
+      // Implicit form.
+      if (isign3 == ENC_OPS1(Imm)) {
+        immValue = o0.as<Imm>().valueAs<uint8_t>();
+        immSize = 1;
+        goto EmitX86Op;
+      }
+
+      // Explicit form.
+      if (isign3 == ENC_OPS2(Reg, Imm) && o0.id() == Gp::kIdAx) {
+        immValue = o1.as<Imm>().valueAs<uint8_t>();
+        immSize = 1;
+        goto EmitX86Op;
+      }
+      break;
+
+    case InstDB::kEncodingX86M_NoMemSize:
+      if (o0.isReg())
+        opcode.addPrefixBySize(o0.size());
+      goto CaseX86M_NoSize;
+
+    case InstDB::kEncodingX86M:
+      opcode.addPrefixBySize(o0.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingX86M_NoSize:
+CaseX86M_NoSize:
+      rbReg = o0.id();
+      if (isign3 == ENC_OPS1(Reg))
+        goto EmitX86R;
+
+      rmRel = &o0;
+      if (isign3 == ENC_OPS1(Mem))
+        goto EmitX86M;
+      break;
+
+    case InstDB::kEncodingX86M_GPB_MulDiv:
+CaseX86M_GPB_MulDiv:
+      // Explicit form?
+      if (isign3 > 0x7) {
+        // [AX] <- [AX] div|mul r8.
+        if (isign3 == ENC_OPS2(Reg, Reg)) {
+          if (ASMJIT_UNLIKELY(!Reg::isGpw(o0, Gp::kIdAx) || !Reg::isGpb(o1)))
+            goto InvalidInstruction;
+
+          rbReg = o1.id();
+          FIXUP_GPB(o1, rbReg);
+          goto EmitX86R;
+        }
+
+        // [AX] <- [AX] div|mul m8.
+        if (isign3 == ENC_OPS2(Reg, Mem)) {
+          if (ASMJIT_UNLIKELY(!Reg::isGpw(o0, Gp::kIdAx)))
+            goto InvalidInstruction;
+
+          rmRel = &o1;
+          goto EmitX86M;
+        }
+
+        // [?DX:?AX] <- [?DX:?AX] div|mul r16|r32|r64
+        if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+          if (ASMJIT_UNLIKELY(o0.size() != o1.size()))
+            goto InvalidInstruction;
+
+          opcode.addArithBySize(o0.size());
+          rbReg = o2.id();
+          goto EmitX86R;
+        }
+
+        // [?DX:?AX] <- [?DX:?AX] div|mul m16|m32|m64
+        if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
+          if (ASMJIT_UNLIKELY(o0.size() != o1.size()))
+            goto InvalidInstruction;
+
+          opcode.addArithBySize(o0.size());
+          rmRel = &o2;
+          goto EmitX86M;
+        }
+
+        goto InvalidInstruction;
+      }
+
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingX86M_GPB:
+      if (isign3 == ENC_OPS1(Reg)) {
+        opcode.addArithBySize(o0.size());
+        rbReg = o0.id();
+
+        if (o0.size() != 1)
+          goto EmitX86R;
+
+        FIXUP_GPB(o0, rbReg);
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+        if (ASMJIT_UNLIKELY(o0.size() == 0))
+          goto AmbiguousOperandSize;
+
+        opcode.addArithBySize(o0.size());
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86M_Only_EDX_EAX:
+      if (isign3 == ENC_OPS3(Mem, Reg, Reg) && Reg::isGpd(o1, Gp::kIdDx) && Reg::isGpd(o2, Gp::kIdAx)) {
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingX86M_Only:
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86M_Nop:
+      if (isign3 == ENC_OPS1(None))
+        goto EmitX86Op;
+
+      // Single operand NOP instruction "0F 1F /0".
+      opcode = Opcode::k000F00 | 0x1F;
+      opReg = 0;
+
+      if (isign3 == ENC_OPS1(Reg)) {
+        opcode.addPrefixBySize(o0.size());
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+        opcode.addPrefixBySize(o0.size());
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+
+      // Two operand NOP instruction "0F 1F /r".
+      opReg = o1.id();
+      opcode.addPrefixBySize(o1.size());
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86R_FromM:
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+        rbReg = o0.id();
+        goto EmitX86RFromM;
+      }
+      break;
+
+    case InstDB::kEncodingX86R32_EDX_EAX:
+      // Explicit form: R32, EDX, EAX.
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (!Reg::isGpd(o1, Gp::kIdDx) || !Reg::isGpd(o2, Gp::kIdAx))
+          goto InvalidInstruction;
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      // Implicit form: R32.
+      if (isign3 == ENC_OPS1(Reg)) {
+        if (!Reg::isGpd(o0))
+          goto InvalidInstruction;
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+      break;
+
+    case InstDB::kEncodingX86R_Native:
+      if (isign3 == ENC_OPS1(Reg)) {
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+      break;
+
+    case InstDB::kEncodingX86Rm:
+      opcode.addPrefixBySize(o0.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingX86Rm_NoSize:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Rm_Raw66H:
+      // We normally emit either [66|F2|F3], this instruction requires 66+[F2|F3].
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        if (o0.size() == 2)
+          writer.emit8(0x66);
+        else
+          opcode.addWBySize(o0.size());
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+
+        if (o0.size() == 2)
+          writer.emit8(0x66);
+        else
+          opcode.addWBySize(o0.size());
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Mr:
+      opcode.addPrefixBySize(o0.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingX86Mr_NoSize:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        rbReg = o0.id();
+        opReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        rmRel = &o0;
+        opReg = o1.id();
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Arith:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opcode.addArithBySize(o0.size());
+
+        if (o0.size() != o1.size())
+          goto OperandSizeMismatch;
+
+        rbReg = o0.id();
+        opReg = o1.id();
+
+        if (o0.size() == 1) {
+          FIXUP_GPB(o0, rbReg);
+          FIXUP_GPB(o1, opReg);
+        }
+
+        // MOD/MR: The default encoding used if not instructed otherwise..
+        if (!Support::test(options, InstOptions::kX86_ModRM))
+          goto EmitX86R;
+
+        // MOD/RM: Alternative encoding selected via instruction options.
+        opcode += 2;
+        std::swap(opReg, rbReg);
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opcode += 2;
+        opcode.addArithBySize(o0.size());
+
+        opReg = o0.id();
+        rmRel = &o1;
+
+        if (o0.size() != 1)
+          goto EmitX86M;
+
+        FIXUP_GPB(o0, opReg);
+        goto EmitX86M;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode.addArithBySize(o1.size());
+        opReg = o1.id();
+        rmRel = &o0;
+
+        if (o1.size() != 1)
+          goto EmitX86M;
+
+        FIXUP_GPB(o1, opReg);
+        goto EmitX86M;
+      }
+
+      // The remaining instructions use 0x80 opcode.
+      opcode = 0x80;
+
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        uint32_t size = o0.size();
+
+        rbReg = o0.id();
+        immValue = o1.as<Imm>().value();
+
+        if (size == 1) {
+          FIXUP_GPB(o0, rbReg);
+          immSize = 1;
+        }
+        else {
+          if (size == 2) {
+            opcode |= Opcode::kPP_66;
+          }
+          else if (size == 4) {
+            // Sign extend so isInt8 returns the right result.
+            immValue = x86SignExtendI32<int64_t>(immValue);
+          }
+          else if (size == 8) {
+            bool canTransformTo32Bit = instId == Inst::kIdAnd && Support::isUInt32(immValue);
+
+            if (!Support::isInt32(immValue)) {
+              // We would do this by default when `kOptionOptimizedForSize` is
+              // enabled, however, in this case we just force this as otherwise
+              // we would have to fail.
+              if (canTransformTo32Bit)
+                size = 4;
+              else
+                goto InvalidImmediate;
+            }
+            else if (canTransformTo32Bit && hasEncodingOption(EncodingOptions::kOptimizeForSize)) {
+              size = 4;
+            }
+
+            opcode.addWBySize(size);
+          }
+
+          immSize = FastUInt8(Support::min<uint32_t>(size, 4));
+          if (Support::isInt8(immValue) && !Support::test(options, InstOptions::kLongForm))
+            immSize = 1;
+        }
+
+        // Short form - AL, AX, EAX, RAX.
+        if (rbReg == 0 && (size == 1 || immSize != 1) && !Support::test(options, InstOptions::kLongForm)) {
+          opcode &= Opcode::kPP_66 | Opcode::kW;
+          opcode |= ((opReg << 3) | (0x04 + (size != 1)));
+          immSize = FastUInt8(Support::min<uint32_t>(size, 4));
+          goto EmitX86Op;
+        }
+
+        opcode += size != 1 ? (immSize != 1 ? 1 : 3) : 0;
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Imm)) {
+        uint32_t memSize = o0.size();
+
+        if (ASMJIT_UNLIKELY(memSize == 0))
+          goto AmbiguousOperandSize;
+
+        immValue = o1.as<Imm>().value();
+        immSize = FastUInt8(Support::min<uint32_t>(memSize, 4));
+
+        // Sign extend so isInt8 returns the right result.
+        if (memSize == 4)
+          immValue = x86SignExtendI32<int64_t>(immValue);
+
+        if (Support::isInt8(immValue) && !Support::test(options, InstOptions::kLongForm))
+          immSize = 1;
+
+        opcode += memSize != 1 ? (immSize != 1 ? 1 : 3) : 0;
+        opcode.addPrefixBySize(memSize);
+
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Bswap:
+      if (isign3 == ENC_OPS1(Reg)) {
+        if (ASMJIT_UNLIKELY(o0.size() == 1))
+          goto InvalidInstruction;
+
+        opReg = o0.id();
+        opcode.addPrefixBySize(o0.size());
+        goto EmitX86OpReg;
+      }
+      break;
+
+    case InstDB::kEncodingX86Bt:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opcode.addPrefixBySize(o1.size());
+        opReg = o1.id();
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode.addPrefixBySize(o1.size());
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+
+      // The remaining instructions use the secondary opcode/r.
+      immValue = o1.as<Imm>().value();
+      immSize = 1;
+
+      opcode = x86AltOpcodeOf(instInfo);
+      opcode.addPrefixBySize(o0.size());
+      opReg = opcode.extractModO();
+
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Imm)) {
+        if (ASMJIT_UNLIKELY(o0.size() == 0))
+          goto AmbiguousOperandSize;
+
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Call:
+      if (isign3 == ENC_OPS1(Reg)) {
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      rmRel = &o0;
+      if (isign3 == ENC_OPS1(Mem))
+        goto EmitX86M;
+
+      // Call with 32-bit displacement use 0xE8 opcode. Call with 8-bit displacement is not encodable so the
+      // alternative opcode field in X86DB must be zero.
+      opcode = 0xE8;
+      opReg = 0;
+      goto EmitJmpCall;
+
+    case InstDB::kEncodingX86Cmpxchg: {
+      // Convert explicit to implicit.
+      if (isign3 & (0x7 << 6)) {
+        if (!Reg::isGp(o2) || o2.id() != Gp::kIdAx)
+          goto InvalidInstruction;
+        isign3 &= 0x3F;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        if (o0.size() != o1.size())
+          goto OperandSizeMismatch;
+
+        opcode.addArithBySize(o0.size());
+        rbReg = o0.id();
+        opReg = o1.id();
+
+        if (o0.size() != 1)
+          goto EmitX86R;
+
+        FIXUP_GPB(o0, rbReg);
+        FIXUP_GPB(o1, opReg);
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode.addArithBySize(o1.size());
+        opReg = o1.id();
+        rmRel = &o0;
+
+        if (o1.size() != 1)
+          goto EmitX86M;
+
+        FIXUP_GPB(o1, opReg);
+        goto EmitX86M;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingX86Cmpxchg8b_16b: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const Operand_& o4 = opExt[EmitterUtils::kOp4];
+
+      if (isign3 == ENC_OPS3(Mem, Reg, Reg)) {
+        if (o3.isReg() && o4.isReg()) {
+          rmRel = &o0;
+          goto EmitX86M;
+        }
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingX86Crc:
+      opReg = o0.id();
+      opcode.addWBySize(o0.size());
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        rbReg = o1.id();
+
+        if (o1.size() == 1) {
+          FIXUP_GPB(o1, rbReg);
+          goto EmitX86R;
+        }
+        else {
+          // This seems to be the only exception of encoding '66F2' prefix.
+          if (o1.size() == 2) writer.emit8(0x66);
+
+          opcode.add(1);
+          goto EmitX86R;
+        }
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        rmRel = &o1;
+        if (o1.size() == 0)
+          goto AmbiguousOperandSize;
+
+        // This seems to be the only exception of encoding '66F2' prefix.
+        if (o1.size() == 2) writer.emit8(0x66);
+
+        opcode += o1.size() != 1;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Enter:
+      if (isign3 == ENC_OPS2(Imm, Imm)) {
+        uint32_t iw = o0.as<Imm>().valueAs<uint16_t>();
+        uint32_t ib = o1.as<Imm>().valueAs<uint8_t>();
+
+        immValue = iw | (ib << 16);
+        immSize = 3;
+        goto EmitX86Op;
+      }
+      break;
+
+    case InstDB::kEncodingX86Imul:
+      // First process all forms distinct of `kEncodingX86M_OptB_MulDiv`.
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opcode = 0x6B;
+        opcode.addPrefixBySize(o0.size());
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+
+        if (!Support::isInt8(immValue) || Support::test(options, InstOptions::kLongForm)) {
+          opcode -= 2;
+          immSize = o0.size() == 2 ? 2 : 4;
+        }
+
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
+        opcode = 0x6B;
+        opcode.addPrefixBySize(o0.size());
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+
+        // Sign extend so isInt8 returns the right result.
+        if (o0.size() == 4)
+          immValue = x86SignExtendI32<int64_t>(immValue);
+
+        if (!Support::isInt8(immValue) || Support::test(options, InstOptions::kLongForm)) {
+          opcode -= 2;
+          immSize = o0.size() == 2 ? 2 : 4;
+        }
+
+        opReg = o0.id();
+        rmRel = &o1;
+
+        goto EmitX86M;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        // Must be explicit 'ax, r8' form.
+        if (o1.size() == 1)
+          goto CaseX86M_GPB_MulDiv;
+
+        if (o0.size() != o1.size())
+          goto OperandSizeMismatch;
+
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        opcode = Opcode::k000F00 | 0xAF;
+        opcode.addPrefixBySize(o0.size());
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        // Must be explicit 'ax, m8' form.
+        if (o1.size() == 1)
+          goto CaseX86M_GPB_MulDiv;
+
+        opReg = o0.id();
+        rmRel = &o1;
+
+        opcode = Opcode::k000F00 | 0xAF;
+        opcode.addPrefixBySize(o0.size());
+        goto EmitX86M;
+      }
+
+      // Shorthand to imul 'reg, reg, imm'.
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        opcode = 0x6B;
+        opcode.addPrefixBySize(o0.size());
+
+        immValue = o1.as<Imm>().value();
+        immSize = 1;
+
+        // Sign extend so isInt8 returns the right result.
+        if (o0.size() == 4)
+          immValue = x86SignExtendI32<int64_t>(immValue);
+
+        if (!Support::isInt8(immValue) || Support::test(options, InstOptions::kLongForm)) {
+          opcode -= 2;
+          immSize = o0.size() == 2 ? 2 : 4;
+        }
+
+        opReg = rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      // Try implicit form.
+      goto CaseX86M_GPB_MulDiv;
+
+    case InstDB::kEncodingX86In:
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdAx))
+          goto InvalidInstruction;
+
+        immValue = o1.as<Imm>().valueAs<uint8_t>();
+        immSize = 1;
+
+        opcode = x86AltOpcodeOf(instInfo) + (o0.size() != 1);
+        opcode.add66hBySize(o0.size());
+        goto EmitX86Op;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdAx || o1.id() != Gp::kIdDx))
+          goto InvalidInstruction;
+
+        opcode += o0.size() != 1;
+        opcode.add66hBySize(o0.size());
+        goto EmitX86Op;
+      }
+      break;
+
+    case InstDB::kEncodingX86Ins:
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        if (ASMJIT_UNLIKELY(!x86IsImplicitMem(o0, Gp::kIdDi) || o1.id() != Gp::kIdDx))
+          goto InvalidInstruction;
+
+        uint32_t size = o0.size();
+        if (ASMJIT_UNLIKELY(size == 0))
+          goto AmbiguousOperandSize;
+
+        rmRel = &o0;
+        opcode += (size != 1);
+
+        opcode.add66hBySize(size);
+        goto EmitX86OpImplicitMem;
+      }
+      break;
+
+    case InstDB::kEncodingX86IncDec:
+      if (isign3 == ENC_OPS1(Reg)) {
+        rbReg = o0.id();
+
+        if (o0.size() == 1) {
+          FIXUP_GPB(o0, rbReg);
+          goto EmitX86R;
+        }
+
+        if (is32Bit()) {
+          // INC r16|r32 is only encodable in 32-bit mode (collides with REX).
+          opcode = x86AltOpcodeOf(instInfo) + (rbReg & 0x07);
+          opcode.add66hBySize(o0.size());
+          goto EmitX86Op;
+        }
+        else {
+          opcode.addArithBySize(o0.size());
+          goto EmitX86R;
+        }
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+        if (!o0.size())
+          goto AmbiguousOperandSize;
+        opcode.addArithBySize(o0.size());
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Int:
+      if (isign3 == ENC_OPS1(Imm)) {
+        immValue = o0.as<Imm>().value();
+        immSize = 1;
+        goto EmitX86Op;
+      }
+      break;
+
+    case InstDB::kEncodingX86Jcc:
+      if (Support::test(options, InstOptions::kTaken | InstOptions::kNotTaken) && hasEncodingOption(EncodingOptions::kPredictedJumps)) {
+        uint8_t prefix = Support::test(options, InstOptions::kTaken) ? uint8_t(0x3E) : uint8_t(0x2E);
+        writer.emit8(prefix);
+      }
+
+      rmRel = &o0;
+      opReg = 0;
+      goto EmitJmpCall;
+
+    case InstDB::kEncodingX86JecxzLoop:
+      rmRel = &o0;
+      // Explicit jecxz|loop [r|e]cx, dst
+      if (o0.isReg()) {
+        if (ASMJIT_UNLIKELY(!Reg::isGp(o0, Gp::kIdCx)))
+          goto InvalidInstruction;
+
+        writer.emitAddressOverride((is32Bit() && o0.size() == 2) || (is64Bit() && o0.size() == 4));
+        rmRel = &o1;
+      }
+
+      opReg = 0;
+      goto EmitJmpCall;
+
+    case InstDB::kEncodingX86Jmp:
+      if (isign3 == ENC_OPS1(Reg)) {
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      rmRel = &o0;
+      if (isign3 == ENC_OPS1(Mem))
+        goto EmitX86M;
+
+      // Jump encoded with 32-bit displacement use 0xE9 opcode. Jump encoded with 8-bit displacement's opcode is
+      // stored as an alternative opcode.
+      opcode = 0xE9;
+      opReg = 0;
+      goto EmitJmpCall;
+
+    case InstDB::kEncodingX86JmpRel:
+      rmRel = &o0;
+      goto EmitJmpCall;
+
+    case InstDB::kEncodingX86LcallLjmp:
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+        uint32_t mSize = rmRel->size();
+        if (mSize == 0) {
+          mSize = registerSize();
+        }
+        else {
+          mSize -= 2;
+          if (mSize != 2 && mSize != 4 && mSize != registerSize())
+            goto InvalidAddress;
+        }
+        opcode.addPrefixBySize(mSize);
+        goto EmitX86M;
+      }
+
+      if (isign3 == ENC_OPS2(Imm, Imm)) {
+        if (!is32Bit())
+          goto InvalidInstruction;
+
+        const Imm& imm0 = o0.as<Imm>();
+        const Imm& imm1 = o1.as<Imm>();
+
+        if (imm0.value() > 0xFFFFu || imm1.value() > 0xFFFFFFFFu)
+          goto InvalidImmediate;
+
+        opcode = x86AltOpcodeOf(instInfo);
+        immValue = imm1.value() | (imm0.value() << 32);
+        immSize = 6;
+        goto EmitX86Op;
+      }
+      break;
+
+    case InstDB::kEncodingX86Lea:
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opcode.addPrefixBySize(o0.size());
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Mov:
+      // Reg <- Reg
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        // Asmjit uses segment registers indexed from 1 to 6, leaving zero as "no segment register used". We have to
+        // fix this (decrement the index of the register) when emitting MOV instructions which move to/from a segment
+        // register. The segment register is always `opReg`, because the MOV instruction uses either RM or MR encoding.
+
+        // GP <- ??
+        if (Reg::isGp(o0)) {
+          rbReg = o0.id();
+          opReg = o1.id();
+
+          // GP <- GP
+          if (Reg::isGp(o1)) {
+            uint32_t opSize = o0.size();
+            if (opSize != o1.size())
+              goto InvalidInstruction;
+
+            if (opSize == 1) {
+              FIXUP_GPB(o0, rbReg);
+              FIXUP_GPB(o1, opReg);
+              opcode = 0x88;
+
+              if (!Support::test(options, InstOptions::kX86_ModRM))
+                goto EmitX86R;
+
+              opcode += 2;
+              std::swap(opReg, rbReg);
+              goto EmitX86R;
+            }
+            else {
+              opcode = 0x89;
+              opcode.addPrefixBySize(opSize);
+
+              if (!Support::test(options, InstOptions::kX86_ModRM))
+                goto EmitX86R;
+
+              opcode += 2;
+              std::swap(opReg, rbReg);
+              goto EmitX86R;
+            }
+          }
+
+          // GP <- SReg
+          if (Reg::isSReg(o1)) {
+            opcode = 0x8C;
+            opcode.addPrefixBySize(o0.size());
+            opReg--;
+            goto EmitX86R;
+          }
+
+          // GP <- CReg
+          if (Reg::isCReg(o1)) {
+            opcode = Opcode::k000F00 | 0x20;
+
+            // Use `LOCK MOV` in 32-bit mode if CR8+ register is accessed (AMD extension).
+            if ((opReg & 0x8) && is32Bit()) {
+              writer.emit8(0xF0);
+              opReg &= 0x7;
+            }
+            goto EmitX86R;
+          }
+
+          // GP <- DReg
+          if (Reg::isDReg(o1)) {
+            opcode = Opcode::k000F00 | 0x21;
+            goto EmitX86R;
+          }
+        }
+        else {
+          opReg = o0.id();
+          rbReg = o1.id();
+
+          // ?? <- GP
+          if (!Reg::isGp(o1))
+            goto InvalidInstruction;
+
+          // SReg <- GP
+          if (Reg::isSReg(o0)) {
+            opcode = 0x8E;
+            opcode.addPrefixBySize(o1.size());
+            opReg--;
+            goto EmitX86R;
+          }
+
+          // CReg <- GP
+          if (Reg::isCReg(o0)) {
+            opcode = Opcode::k000F00 | 0x22;
+
+            // Use `LOCK MOV` in 32-bit mode if CR8+ register is accessed (AMD extension).
+            if ((opReg & 0x8) && is32Bit()) {
+              writer.emit8(0xF0);
+              opReg &= 0x7;
+            }
+            goto EmitX86R;
+          }
+
+          // DReg <- GP
+          if (Reg::isDReg(o0)) {
+            opcode = Opcode::k000F00 | 0x23;
+            goto EmitX86R;
+          }
+        }
+
+        goto InvalidInstruction;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+
+        // SReg <- Mem
+        if (Reg::isSReg(o0)) {
+          opcode = 0x8E;
+          opcode.addPrefixBySize(o1.size());
+          opReg--;
+          goto EmitX86M;
+        }
+        // Reg <- Mem
+        else {
+          opcode = 0;
+          opcode.addArithBySize(o0.size());
+
+          // Handle a special form of `mov al|ax|eax|rax, [ptr64]` that doesn't use MOD.
+          if (opReg == Gp::kIdAx && !rmRel->as<Mem>().hasBaseOrIndex()) {
+            if (x86ShouldUseMovabs(this, writer, o0.size(), options, rmRel->as<Mem>())) {
+              opcode += 0xA0;
+              immValue = rmRel->as<Mem>().offset();
+              goto EmitX86OpMovAbs;
+            }
+          }
+
+          if (o0.size() == 1)
+            FIXUP_GPB(o0, opReg);
+
+          opcode += 0x8A;
+          goto EmitX86M;
+        }
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opReg = o1.id();
+        rmRel = &o0;
+
+        // Mem <- SReg
+        if (Reg::isSReg(o1)) {
+          opcode = 0x8C;
+          opcode.addPrefixBySize(o0.size());
+          opReg--;
+          goto EmitX86M;
+        }
+        // Mem <- Reg
+        else {
+          opcode = 0;
+          opcode.addArithBySize(o1.size());
+
+          // Handle a special form of `mov [ptr64], al|ax|eax|rax` that doesn't use MOD.
+          if (opReg == Gp::kIdAx && !rmRel->as<Mem>().hasBaseOrIndex()) {
+            if (x86ShouldUseMovabs(this, writer, o1.size(), options, rmRel->as<Mem>())) {
+              opcode += 0xA2;
+              immValue = rmRel->as<Mem>().offset();
+              goto EmitX86OpMovAbs;
+            }
+          }
+
+          if (o1.size() == 1)
+            FIXUP_GPB(o1, opReg);
+
+          opcode += 0x88;
+          goto EmitX86M;
+        }
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        opReg = o0.id();
+        immSize = FastUInt8(o0.size());
+
+        if (immSize == 1) {
+          FIXUP_GPB(o0, opReg);
+
+          opcode = 0xB0;
+          immValue = o1.as<Imm>().valueAs<uint8_t>();
+          goto EmitX86OpReg;
+        }
+        else {
+          // 64-bit immediate in 64-bit mode is allowed.
+          immValue = o1.as<Imm>().value();
+
+          // Optimize the instruction size by using a 32-bit immediate if possible.
+          if (immSize == 8 && !Support::test(options, InstOptions::kLongForm)) {
+            if (Support::isUInt32(immValue) && hasEncodingOption(EncodingOptions::kOptimizeForSize)) {
+              // Zero-extend by using a 32-bit GPD destination instead of a 64-bit GPQ.
+              immSize = 4;
+            }
+            else if (Support::isInt32(immValue)) {
+              // Sign-extend, uses 'C7 /0' opcode.
+              rbReg = opReg;
+
+              opcode = Opcode::kW | 0xC7;
+              opReg = 0;
+
+              immSize = 4;
+              goto EmitX86R;
+            }
+          }
+
+          opcode = 0xB8;
+          opcode.addPrefixBySize(immSize);
+          goto EmitX86OpReg;
+        }
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Imm)) {
+        uint32_t memSize = o0.size();
+        if (ASMJIT_UNLIKELY(memSize == 0))
+          goto AmbiguousOperandSize;
+
+        opcode = 0xC6 + (memSize != 1);
+        opcode.addPrefixBySize(memSize);
+        opReg = 0;
+        rmRel = &o0;
+
+        immValue = o1.as<Imm>().value();
+        immSize = FastUInt8(Support::min<uint32_t>(memSize, 4));
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Movabs:
+      // Reg <- Mem
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+
+        opcode = 0xA0;
+        opcode.addArithBySize(o0.size());
+
+        if (ASMJIT_UNLIKELY(!o0.as<Reg>().isGp()) || opReg != Gp::kIdAx)
+          goto InvalidInstruction;
+
+        if (ASMJIT_UNLIKELY(rmRel->as<Mem>().hasBaseOrIndex()))
+          goto InvalidAddress;
+
+        if (ASMJIT_UNLIKELY(rmRel->as<Mem>().addrType() == Mem::AddrType::kRel))
+          goto InvalidAddress;
+
+        immValue = rmRel->as<Mem>().offset();
+        goto EmitX86OpMovAbs;
+      }
+
+      // Mem <- Reg
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opReg = o1.id();
+        rmRel = &o0;
+
+        opcode = 0xA2;
+        opcode.addArithBySize(o1.size());
+
+        if (ASMJIT_UNLIKELY(!o1.as<Reg>().isGp()) || opReg != Gp::kIdAx)
+          goto InvalidInstruction;
+
+        if (ASMJIT_UNLIKELY(rmRel->as<Mem>().hasBaseOrIndex()))
+          goto InvalidAddress;
+
+        immValue = rmRel->as<Mem>().offset();
+        goto EmitX86OpMovAbs;
+      }
+
+      // Reg <- Imm.
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        if (ASMJIT_UNLIKELY(!o0.as<Reg>().isGpq()))
+          goto InvalidInstruction;
+
+        opReg = o0.id();
+        opcode = 0xB8;
+
+        immSize = 8;
+        immValue = o1.as<Imm>().value();
+
+        opcode.addPrefixBySize(8);
+        goto EmitX86OpReg;
+      }
+      break;
+
+    case InstDB::kEncodingX86MovsxMovzx:
+      opcode.add(o1.size() != 1);
+      opcode.addPrefixBySize(o0.size());
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        if (o1.size() != 1)
+          goto EmitX86R;
+
+        FIXUP_GPB(o1, rbReg);
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86MovntiMovdiri:
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode.addWIf(Reg::isGpq(o1));
+
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86EnqcmdMovdir64b:
+      if (isign3 == ENC_OPS2(Mem, Mem)) {
+        const Mem& m0 = o0.as<Mem>();
+        // This is the only required validation, the rest is handled afterwards.
+        if (ASMJIT_UNLIKELY(m0.baseType() != o1.as<Mem>().baseType() ||
+                            m0.hasIndex() ||
+                            m0.hasOffset() ||
+                            (m0.hasSegment() && m0.segmentId() != SReg::kIdEs)))
+          goto InvalidInstruction;
+
+        // The first memory operand is passed via register, the second memory operand is RM.
+        opReg = o0.as<Mem>().baseId();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Out:
+      if (isign3 == ENC_OPS2(Imm, Reg)) {
+        if (ASMJIT_UNLIKELY(o1.id() != Gp::kIdAx))
+          goto InvalidInstruction;
+
+        opcode = x86AltOpcodeOf(instInfo) + (o1.size() != 1);
+        opcode.add66hBySize(o1.size());
+
+        immValue = o0.as<Imm>().valueAs<uint8_t>();
+        immSize = 1;
+        goto EmitX86Op;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdDx || o1.id() != Gp::kIdAx))
+          goto InvalidInstruction;
+
+        opcode.add(o1.size() != 1);
+        opcode.add66hBySize(o1.size());
+        goto EmitX86Op;
+      }
+      break;
+
+    case InstDB::kEncodingX86Outs:
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdDx || !x86IsImplicitMem(o1, Gp::kIdSi)))
+          goto InvalidInstruction;
+
+        uint32_t size = o1.size();
+        if (ASMJIT_UNLIKELY(size == 0))
+          goto AmbiguousOperandSize;
+
+        rmRel = &o1;
+        opcode.add(size != 1);
+        opcode.add66hBySize(size);
+        goto EmitX86OpImplicitMem;
+      }
+      break;
+
+    case InstDB::kEncodingX86Push:
+      if (isign3 == ENC_OPS1(Reg)) {
+        if (Reg::isSReg(o0)) {
+          uint32_t segment = o0.id();
+          if (ASMJIT_UNLIKELY(segment >= SReg::kIdCount))
+            goto InvalidSegment;
+
+          opcode = x86OpcodePushSReg[segment];
+          goto EmitX86Op;
+        }
+        else {
+          goto CaseX86PushPop_Gp;
+        }
+      }
+
+      if (isign3 == ENC_OPS1(Imm)) {
+        immValue = o0.as<Imm>().value();
+        immSize = 4;
+
+        if (Support::isInt8(immValue) && !Support::test(options, InstOptions::kLongForm))
+          immSize = 1;
+
+        opcode = immSize == 1 ? 0x6A : 0x68;
+        goto EmitX86Op;
+      }
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingX86Pop:
+      if (isign3 == ENC_OPS1(Reg)) {
+        if (Reg::isSReg(o0)) {
+          uint32_t segment = o0.id();
+          if (ASMJIT_UNLIKELY(segment == SReg::kIdCs || segment >= SReg::kIdCount))
+            goto InvalidSegment;
+
+          opcode = x86OpcodePopSReg[segment];
+          goto EmitX86Op;
+        }
+        else {
+CaseX86PushPop_Gp:
+          // We allow 2 byte, 4 byte, and 8 byte register sizes, although PUSH and POP only allow 2 bytes or
+          // native size. On 64-bit we simply PUSH/POP 64-bit register even if 32-bit register was given.
+          if (ASMJIT_UNLIKELY(o0.size() < 2))
+            goto InvalidInstruction;
+
+          opcode = x86AltOpcodeOf(instInfo);
+          opcode.add66hBySize(o0.size());
+          opReg = o0.id();
+          goto EmitX86OpReg;
+        }
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+        if (ASMJIT_UNLIKELY(o0.size() == 0))
+          goto AmbiguousOperandSize;
+
+        if (ASMJIT_UNLIKELY(o0.size() != 2 && o0.size() != registerSize()))
+          goto InvalidInstruction;
+
+        opcode.add66hBySize(o0.size());
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Ret:
+      if (isign3 == 0) {
+        // 'ret' without immediate, change C2 to C3.
+        opcode.add(1);
+        goto EmitX86Op;
+      }
+
+      if (isign3 == ENC_OPS1(Imm)) {
+        immValue = o0.as<Imm>().value();
+        if (immValue == 0 && !Support::test(options, InstOptions::kLongForm)) {
+          // 'ret' without immediate, change C2 to C3.
+          opcode.add(1);
+          goto EmitX86Op;
+        }
+        else {
+          immSize = 2;
+          goto EmitX86Op;
+        }
+      }
+      break;
+
+    case InstDB::kEncodingX86Rot:
+      if (o0.isReg()) {
+        opcode.addArithBySize(o0.size());
+        rbReg = o0.id();
+
+        if (o0.size() == 1)
+          FIXUP_GPB(o0, rbReg);
+
+        if (isign3 == ENC_OPS2(Reg, Reg)) {
+          if (ASMJIT_UNLIKELY(o1.id() != Gp::kIdCx))
+            goto InvalidInstruction;
+
+          opcode += 2;
+          goto EmitX86R;
+        }
+
+        if (isign3 == ENC_OPS2(Reg, Imm)) {
+          immValue = o1.as<Imm>().value() & 0xFF;
+          immSize = 0;
+
+          if (immValue == 1 && !Support::test(options, InstOptions::kLongForm))
+            goto EmitX86R;
+
+          opcode -= 0x10;
+          immSize = 1;
+          goto EmitX86R;
+        }
+      }
+      else {
+        if (ASMJIT_UNLIKELY(o0.size() == 0))
+          goto AmbiguousOperandSize;
+        opcode.addArithBySize(o0.size());
+
+        if (isign3 == ENC_OPS2(Mem, Reg)) {
+          if (ASMJIT_UNLIKELY(o1.id() != Gp::kIdCx))
+            goto InvalidInstruction;
+
+          opcode += 2;
+          rmRel = &o0;
+          goto EmitX86M;
+        }
+
+        if (isign3 == ENC_OPS2(Mem, Imm)) {
+          rmRel = &o0;
+          immValue = o1.as<Imm>().value() & 0xFF;
+          immSize = 0;
+
+          if (immValue == 1 && !Support::test(options, InstOptions::kLongForm))
+            goto EmitX86M;
+
+          opcode -= 0x10;
+          immSize = 1;
+          goto EmitX86M;
+        }
+      }
+      break;
+
+    case InstDB::kEncodingX86Set:
+      if (isign3 == ENC_OPS1(Reg)) {
+        rbReg = o0.id();
+        FIXUP_GPB(o0, rbReg);
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86ShldShrd:
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opcode.addPrefixBySize(o0.size());
+        opReg = o1.id();
+        rbReg = o0.id();
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
+        opcode.addPrefixBySize(o1.size());
+        opReg = o1.id();
+        rmRel = &o0;
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+        goto EmitX86M;
+      }
+
+      // The following instructions use opcode + 1.
+      opcode.add(1);
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (ASMJIT_UNLIKELY(o2.id() != Gp::kIdCx))
+          goto InvalidInstruction;
+
+        opcode.addPrefixBySize(o0.size());
+        opReg = o1.id();
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS3(Mem, Reg, Reg)) {
+        if (ASMJIT_UNLIKELY(o2.id() != Gp::kIdCx))
+          goto InvalidInstruction;
+
+        opcode.addPrefixBySize(o1.size());
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86StrRm:
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        rmRel = &o1;
+        if (ASMJIT_UNLIKELY(rmRel->as<Mem>().offsetLo32() || !Reg::isGp(o0.as<Reg>(), Gp::kIdAx)))
+          goto InvalidInstruction;
+
+        uint32_t size = o0.size();
+        if (o1.hasSize() && ASMJIT_UNLIKELY(o1.size() != size))
+          goto OperandSizeMismatch;
+
+        opcode.addArithBySize(size);
+        goto EmitX86OpImplicitMem;
+      }
+      break;
+
+    case InstDB::kEncodingX86StrMr:
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        rmRel = &o0;
+        if (ASMJIT_UNLIKELY(rmRel->as<Mem>().offsetLo32() || !Reg::isGp(o1.as<Reg>(), Gp::kIdAx)))
+          goto InvalidInstruction;
+
+        uint32_t size = o1.size();
+        if (o0.hasSize() && ASMJIT_UNLIKELY(o0.size() != size))
+          goto OperandSizeMismatch;
+
+        opcode.addArithBySize(size);
+        goto EmitX86OpImplicitMem;
+      }
+      break;
+
+    case InstDB::kEncodingX86StrMm:
+      if (isign3 == ENC_OPS2(Mem, Mem)) {
+        if (ASMJIT_UNLIKELY(o0.as<Mem>().baseAndIndexTypes() !=
+                            o1.as<Mem>().baseAndIndexTypes()))
+          goto InvalidInstruction;
+
+        rmRel = &o1;
+        if (ASMJIT_UNLIKELY(o0.as<Mem>().hasOffset()))
+          goto InvalidInstruction;
+
+        uint32_t size = o1.size();
+        if (ASMJIT_UNLIKELY(size == 0))
+          goto AmbiguousOperandSize;
+
+        if (ASMJIT_UNLIKELY(o0.size() != size))
+          goto OperandSizeMismatch;
+
+        opcode.addArithBySize(size);
+        goto EmitX86OpImplicitMem;
+      }
+      break;
+
+    case InstDB::kEncodingX86Test:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        if (o0.size() != o1.size())
+          goto OperandSizeMismatch;
+
+        opcode.addArithBySize(o0.size());
+        rbReg = o0.id();
+        opReg = o1.id();
+
+        if (o0.size() != 1)
+          goto EmitX86R;
+
+        FIXUP_GPB(o0, rbReg);
+        FIXUP_GPB(o1, opReg);
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode.addArithBySize(o1.size());
+        opReg = o1.id();
+        rmRel = &o0;
+
+        if (o1.size() != 1)
+          goto EmitX86M;
+
+        FIXUP_GPB(o1, opReg);
+        goto EmitX86M;
+      }
+
+      // The following instructions use the secondary opcode.
+      opcode = x86AltOpcodeOf(instInfo);
+      opReg = opcode.extractModO();
+
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        opcode.addArithBySize(o0.size());
+        rbReg = o0.id();
+
+        if (o0.size() == 1) {
+          FIXUP_GPB(o0, rbReg);
+          immValue = o1.as<Imm>().valueAs<uint8_t>();
+          immSize = 1;
+        }
+        else {
+          immValue = o1.as<Imm>().value();
+          immSize = FastUInt8(Support::min<uint32_t>(o0.size(), 4));
+        }
+
+        // Short form - AL, AX, EAX, RAX.
+        if (rbReg == 0 && !Support::test(options, InstOptions::kLongForm)) {
+          opcode &= Opcode::kPP_66 | Opcode::kW;
+          opcode |= 0xA8 + (o0.size() != 1);
+          goto EmitX86Op;
+        }
+
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Imm)) {
+        if (ASMJIT_UNLIKELY(o0.size() == 0))
+          goto AmbiguousOperandSize;
+
+        opcode.addArithBySize(o0.size());
+        rmRel = &o0;
+
+        immValue = o1.as<Imm>().value();
+        immSize = FastUInt8(Support::min<uint32_t>(o0.size(), 4));
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Xchg:
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opcode.addArithBySize(o0.size());
+        opReg = o0.id();
+        rmRel = &o1;
+
+        if (o0.size() != 1)
+          goto EmitX86M;
+
+        FIXUP_GPB(o0, opReg);
+        goto EmitX86M;
+      }
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingX86Xadd:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        rbReg = o0.id();
+        opReg = o1.id();
+
+        uint32_t opSize = o0.size();
+        if (opSize != o1.size())
+          goto OperandSizeMismatch;
+
+        if (opSize == 1) {
+          FIXUP_GPB(o0, rbReg);
+          FIXUP_GPB(o1, opReg);
+          goto EmitX86R;
+        }
+
+        // Special cases for 'xchg ?ax, reg'.
+        if (instId == Inst::kIdXchg && (opReg == 0 || rbReg == 0)) {
+          if (is64Bit() && opReg == rbReg && opSize >= 4) {
+            if (opSize == 8) {
+              // Encode 'xchg rax, rax' as '90' (REX and other prefixes are optional).
+              opcode &= Opcode::kW;
+              opcode |= 0x90;
+              goto EmitX86OpReg;
+            }
+            else {
+              // Encode 'xchg eax, eax' by by using a generic path.
+            }
+          }
+          else if (!Support::test(options, InstOptions::kLongForm)) {
+            // The special encoding encodes only one register, which is non-zero.
+            opReg += rbReg;
+
+            opcode.addArithBySize(opSize);
+            opcode &= Opcode::kW | Opcode::kPP_66;
+            opcode |= 0x90;
+            goto EmitX86OpReg;
+          }
+        }
+
+        opcode.addArithBySize(opSize);
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode.addArithBySize(o1.size());
+        opReg = o1.id();
+        rmRel = &o0;
+
+        if (o1.size() == 1) {
+          FIXUP_GPB(o1, opReg);
+        }
+
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Fence:
+      rbReg = 0;
+      goto EmitX86R;
+
+    case InstDB::kEncodingX86Bndmov:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        // ModRM encoding:
+        if (!Support::test(options, InstOptions::kX86_ModMR))
+          goto EmitX86R;
+
+        // ModMR encoding:
+        opcode = x86AltOpcodeOf(instInfo);
+        std::swap(opReg, rbReg);
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode = x86AltOpcodeOf(instInfo);
+
+        rmRel = &o0;
+        opReg = o1.id();
+        goto EmitX86M;
+      }
+      break;
+
+    // FPU Instructions
+    // ----------------
+
+    case InstDB::kEncodingFpuOp:
+      goto EmitFpuOp;
+
+    case InstDB::kEncodingFpuArith:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        // We switch to the alternative opcode if the first operand is zero.
+        if (opReg == 0) {
+CaseFpuArith_Reg:
+          opcode = ((0xD8   << Opcode::kFPU_2B_Shift)       ) +
+                   ((opcode >> Opcode::kFPU_2B_Shift) & 0xFF) + rbReg;
+          goto EmitFpuOp;
+        }
+        else if (rbReg == 0) {
+          rbReg = opReg;
+          opcode = ((0xDC   << Opcode::kFPU_2B_Shift)       ) +
+                   ((opcode                         ) & 0xFF) + rbReg;
+          goto EmitFpuOp;
+        }
+        else {
+          goto InvalidInstruction;
+        }
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+CaseFpuArith_Mem:
+        // 0xD8/0xDC, depends on the size of the memory operand; opReg is valid.
+        opcode = (o0.size() == 4) ? 0xD8 : 0xDC;
+        // Clear compressed displacement before going to EmitX86M.
+        opcode &= ~uint32_t(Opcode::kCDSHL_Mask);
+
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingFpuCom:
+      if (isign3 == 0) {
+        rbReg = 1;
+        goto CaseFpuArith_Reg;
+      }
+
+      if (isign3 == ENC_OPS1(Reg)) {
+        rbReg = o0.id();
+        goto CaseFpuArith_Reg;
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+        goto CaseFpuArith_Mem;
+      }
+      break;
+
+    case InstDB::kEncodingFpuFldFst:
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+
+        if (o0.size() == 4 && commonInfo->hasFlag(InstDB::InstFlags::kFpuM32)) {
+          goto EmitX86M;
+        }
+
+        if (o0.size() == 8 && commonInfo->hasFlag(InstDB::InstFlags::kFpuM64)) {
+          opcode += 4;
+          goto EmitX86M;
+        }
+
+        if (o0.size() == 10 && commonInfo->hasFlag(InstDB::InstFlags::kFpuM80)) {
+          opcode = x86AltOpcodeOf(instInfo);
+          opReg  = opcode.extractModO();
+          goto EmitX86M;
+        }
+      }
+
+      if (isign3 == ENC_OPS1(Reg)) {
+        if (instId == Inst::kIdFld ) { opcode = (0xD9 << Opcode::kFPU_2B_Shift) + 0xC0 + o0.id(); goto EmitFpuOp; }
+        if (instId == Inst::kIdFst ) { opcode = (0xDD << Opcode::kFPU_2B_Shift) + 0xD0 + o0.id(); goto EmitFpuOp; }
+        if (instId == Inst::kIdFstp) { opcode = (0xDD << Opcode::kFPU_2B_Shift) + 0xD8 + o0.id(); goto EmitFpuOp; }
+      }
+      break;
+
+    case InstDB::kEncodingFpuM:
+      if (isign3 == ENC_OPS1(Mem)) {
+        // Clear compressed displacement before going to EmitX86M.
+        opcode &= ~uint32_t(Opcode::kCDSHL_Mask);
+
+        rmRel = &o0;
+        if (o0.size() == 2 && commonInfo->hasFlag(InstDB::InstFlags::kFpuM16)) {
+          opcode += 4;
+          goto EmitX86M;
+        }
+
+        if (o0.size() == 4 && commonInfo->hasFlag(InstDB::InstFlags::kFpuM32)) {
+          goto EmitX86M;
+        }
+
+        if (o0.size() == 8 && commonInfo->hasFlag(InstDB::InstFlags::kFpuM64)) {
+          opcode = x86AltOpcodeOf(instInfo) & ~uint32_t(Opcode::kCDSHL_Mask);
+          opReg  = opcode.extractModO();
+          goto EmitX86M;
+        }
+      }
+      break;
+
+    case InstDB::kEncodingFpuRDef:
+      if (isign3 == 0) {
+        opcode += 1;
+        goto EmitFpuOp;
+      }
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingFpuR:
+      if (isign3 == ENC_OPS1(Reg)) {
+        opcode += o0.id();
+        goto EmitFpuOp;
+      }
+      break;
+
+    case InstDB::kEncodingFpuStsw:
+      if (isign3 == ENC_OPS1(Reg)) {
+        if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdAx))
+          goto InvalidInstruction;
+
+        opcode = x86AltOpcodeOf(instInfo);
+        goto EmitFpuOp;
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+        // Clear compressed displacement before going to EmitX86M.
+        opcode &= ~uint32_t(Opcode::kCDSHL_Mask);
+
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    // Ext Instructions (Legacy Extensions)
+    // ------------------------------------
+
+    case InstDB::kEncodingExtPextrw:
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opcode.add66hIf(Reg::isXmm(o1));
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
+        // Secondary opcode of 'pextrw' instruction (SSE4.1).
+        opcode = x86AltOpcodeOf(instInfo);
+        opcode.add66hIf(Reg::isXmm(o1));
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingExtExtract:
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opcode.add66hIf(Reg::isXmm(o1));
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+
+        opReg = o1.id();
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
+        opcode.add66hIf(Reg::isXmm(o1));
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingExtMov:
+      // GP|MM|XMM <- GP|MM|XMM
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        if (!Support::test(options, InstOptions::kX86_ModMR) || !instInfo->_altOpcodeIndex)
+          goto EmitX86R;
+
+        opcode = x86AltOpcodeOf(instInfo);
+        std::swap(opReg, rbReg);
+        goto EmitX86R;
+      }
+
+      // GP|MM|XMM <- Mem
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+
+      // The following instruction uses opcode[1].
+      opcode = x86AltOpcodeOf(instInfo);
+
+      // Mem <- GP|MM|XMM
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingExtMovbe:
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        if (o0.size() == 1)
+          goto InvalidInstruction;
+
+        opcode.addPrefixBySize(o0.size());
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+
+      // The following instruction uses the secondary opcode.
+      opcode = x86AltOpcodeOf(instInfo);
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        if (o1.size() == 1)
+          goto InvalidInstruction;
+
+        opcode.addPrefixBySize(o1.size());
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingExtMovd:
+CaseExtMovd:
+      opReg = o0.id();
+      opcode.add66hIf(Reg::isXmm(o0));
+
+      // MM/XMM <- Gp
+      if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o1)) {
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      // MM/XMM <- Mem
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+
+      // The following instructions use the secondary opcode.
+      opcode &= Opcode::kW;
+      opcode |= x86AltOpcodeOf(instInfo);
+      opReg = o1.id();
+      opcode.add66hIf(Reg::isXmm(o1));
+
+      // GP <- MM/XMM
+      if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o0)) {
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      // Mem <- MM/XMM
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingExtMovq:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        // MM <- MM
+        if (Reg::isMm(o0) && Reg::isMm(o1)) {
+          opcode = Opcode::k000F00 | 0x6F;
+
+          if (!Support::test(options, InstOptions::kX86_ModMR))
+            goto EmitX86R;
+
+          opcode += 0x10;
+          std::swap(opReg, rbReg);
+          goto EmitX86R;
+        }
+
+        // XMM <- XMM
+        if (Reg::isXmm(o0) && Reg::isXmm(o1)) {
+          opcode = Opcode::kF30F00 | 0x7E;
+
+          if (!Support::test(options, InstOptions::kX86_ModMR))
+            goto EmitX86R;
+
+          opcode = Opcode::k660F00 | 0xD6;
+          std::swap(opReg, rbReg);
+          goto EmitX86R;
+        }
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+
+        // MM <- Mem
+        if (Reg::isMm(o0)) {
+          opcode = Opcode::k000F00 | 0x6F;
+          goto EmitX86M;
+        }
+
+        // XMM <- Mem
+        if (Reg::isXmm(o0)) {
+          opcode = Opcode::kF30F00 | 0x7E;
+          goto EmitX86M;
+        }
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opReg = o1.id();
+        rmRel = &o0;
+
+        // Mem <- MM
+        if (Reg::isMm(o1)) {
+          opcode = Opcode::k000F00 | 0x7F;
+          goto EmitX86M;
+        }
+
+        // Mem <- XMM
+        if (Reg::isXmm(o1)) {
+          opcode = Opcode::k660F00 | 0xD6;
+          goto EmitX86M;
+        }
+      }
+
+      // MOVQ in other case is simply a MOVD instruction promoted to 64-bit.
+      opcode |= Opcode::kW;
+      goto CaseExtMovd;
+
+    case InstDB::kEncodingExtRm_XMM0:
+      if (ASMJIT_UNLIKELY(!o2.isNone() && !Reg::isXmm(o2, 0)))
+        goto InvalidInstruction;
+
+      isign3 &= 0x3F;
+      goto CaseExtRm;
+
+    case InstDB::kEncodingExtRm_ZDI:
+      if (ASMJIT_UNLIKELY(!o2.isNone() && !x86IsImplicitMem(o2, Gp::kIdDi)))
+        goto InvalidInstruction;
+
+      isign3 &= 0x3F;
+      goto CaseExtRm;
+
+    case InstDB::kEncodingExtRm_Wx:
+      opcode.addWIf(o1.size() == 8);
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingExtRm_Wx_GpqOnly:
+      opcode.addWIf(Reg::isGpq(o0));
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingExtRm:
+CaseExtRm:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingExtRm_P:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opcode.add66hIf(Reg::isXmm(o0) | Reg::isXmm(o1));
+
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opcode.add66hIf(Reg::isXmm(o0));
+
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingExtRmRi:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+
+      // The following instruction uses the secondary opcode.
+      opcode = x86AltOpcodeOf(instInfo);
+      opReg  = opcode.extractModO();
+
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        immValue = o1.as<Imm>().value();
+        immSize = 1;
+
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+      break;
+
+    case InstDB::kEncodingExtRmRi_P:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opcode.add66hIf(Reg::isXmm(o0) | Reg::isXmm(o1));
+
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opcode.add66hIf(Reg::isXmm(o0));
+
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+
+      // The following instruction uses the secondary opcode.
+      opcode = x86AltOpcodeOf(instInfo);
+      opReg  = opcode.extractModO();
+
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        opcode.add66hIf(Reg::isXmm(o0));
+
+        immValue = o1.as<Imm>().value();
+        immSize = 1;
+
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+      break;
+
+    case InstDB::kEncodingExtRmi:
+      immValue = o2.as<Imm>().value();
+      immSize = 1;
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingExtRmi_P:
+      immValue = o2.as<Imm>().value();
+      immSize = 1;
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opcode.add66hIf(Reg::isXmm(o0) | Reg::isXmm(o1));
+
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
+        opcode.add66hIf(Reg::isXmm(o0));
+
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    // Extrq & Insertq (SSE4A)
+    // -----------------------
+
+    case InstDB::kEncodingExtExtrq:
+      opReg = o0.id();
+      rbReg = o1.id();
+
+      if (isign3 == ENC_OPS2(Reg, Reg))
+        goto EmitX86R;
+
+      if (isign3 == ENC_OPS3(Reg, Imm, Imm)) {
+        // This variant of the instruction uses the secondary opcode.
+        opcode = x86AltOpcodeOf(instInfo);
+        rbReg = opReg;
+        opReg = opcode.extractModO();
+
+        immValue = (uint32_t(o1.as<Imm>().valueAs<uint8_t>())     ) +
+                   (uint32_t(o2.as<Imm>().valueAs<uint8_t>()) << 8) ;
+        immSize = 2;
+        goto EmitX86R;
+      }
+      break;
+
+    case InstDB::kEncodingExtInsertq: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const uint32_t isign4 = isign3 + (uint32_t(o3.opType()) << 9);
+
+      opReg = o0.id();
+      rbReg = o1.id();
+
+      if (isign4 == ENC_OPS2(Reg, Reg))
+        goto EmitX86R;
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+        // This variant of the instruction uses the secondary opcode.
+        opcode = x86AltOpcodeOf(instInfo);
+
+        immValue = (uint32_t(o2.as<Imm>().valueAs<uint8_t>())     ) +
+                   (uint32_t(o3.as<Imm>().valueAs<uint8_t>()) << 8) ;
+        immSize = 2;
+        goto EmitX86R;
+      }
+      break;
+    }
+
+    // 3DNOW Instructions
+    // ------------------
+
+    case InstDB::kEncodingExt3dNow:
+      // Every 3dNow instruction starts with 0x0F0F and the actual opcode is
+      // stored as 8-bit immediate.
+      immValue = opcode.v & 0xFFu;
+      immSize = 1;
+
+      opcode = Opcode::k000F00 | 0x0F;
+      opReg = o0.id();
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    // VEX/EVEX Instructions
+    // ---------------------
+
+    case InstDB::kEncodingVexOp:
+      goto EmitVexOp;
+
+    case InstDB::kEncodingVexOpMod:
+      rbReg = 0;
+      goto EmitVexEvexR;
+
+    case InstDB::kEncodingVexKmov:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        // Form 'k, reg'.
+        if (Reg::isGp(o1)) {
+          opcode = x86AltOpcodeOf(instInfo);
+          goto EmitVexEvexR;
+        }
+
+        // Form 'reg, k'.
+        if (Reg::isGp(o0)) {
+          opcode = x86AltOpcodeOf(instInfo) + 1;
+          goto EmitVexEvexR;
+        }
+
+        // Form 'k, k'.
+        if (!Support::test(options, InstOptions::kX86_ModMR))
+          goto EmitVexEvexR;
+
+        opcode.add(1);
+        std::swap(opReg, rbReg);
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+
+        goto EmitVexEvexM;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode.add(1);
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexR_Wx:
+      if (isign3 == ENC_OPS1(Reg)) {
+        rbReg = o0.id();
+        opcode.addWIf(o0.as<Reg>().isGpq());
+        goto EmitVexEvexR;
+      }
+      break;
+
+    case InstDB::kEncodingVexM:
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexM_VM:
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexMr_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o1.id();
+        rbReg = o0.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexMr_VM:
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode |= Support::max(x86OpcodeLByVMem(o0), x86OpcodeLBySize(o1.size()));
+
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexMri_Vpextrw:
+      // Use 'vpextrw reg, xmm1, i8' when possible.
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opcode = Opcode::k660F00 | 0xC5;
+
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+        goto EmitVexEvexR;
+      }
+
+      goto CaseVexMri;
+
+    case InstDB::kEncodingVexMri_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexMri:
+CaseVexMri:
+      immValue = o2.as<Imm>().value();
+      immSize = 1;
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opReg = o1.id();
+        rbReg = o0.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRm_ZDI:
+      if (ASMJIT_UNLIKELY(!o2.isNone() && !x86IsImplicitMem(o2, Gp::kIdDi)))
+        goto InvalidInstruction;
+
+      isign3 &= 0x3F;
+      goto CaseVexRm;
+
+    case InstDB::kEncodingVexRm_Wx:
+      opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o1));
+      goto CaseVexRm;
+
+    case InstDB::kEncodingVexRm_Lx_Narrow:
+      if (o1.size())
+        opcode |= x86OpcodeLBySize(o1.size());
+      else if (o0.size() == 32)
+        opcode |= Opcode::kLL_2;
+      goto CaseVexRm;
+
+    case InstDB::kEncodingVexRm_Lx_Bcst:
+      if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o1.as<Reg>())) {
+        opcode = x86AltOpcodeOf(instInfo) | x86OpcodeLBySize(o0.size() | o1.size());
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRm_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRm:
+CaseVexRm:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRm_VM:
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opcode |= Support::max(x86OpcodeLByVMem(o1), x86OpcodeLBySize(o0.size()));
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRm_T1_4X: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const Operand_& o4 = opExt[EmitterUtils::kOp4];
+      const Operand_& o5 = opExt[EmitterUtils::kOp5];
+
+      if (Reg::isVec(o0) && Reg::isVec(o1) && Reg::isVec(o2) && Reg::isVec(o3) && Reg::isVec(o4) && o5.isMem()) {
+        // Registers [o1, o2, o3, o4] must start aligned and must be consecutive.
+        uint32_t i1 = o1.id();
+        uint32_t i2 = o2.id();
+        uint32_t i3 = o3.id();
+        uint32_t i4 = o4.id();
+
+        if (ASMJIT_UNLIKELY((i1 & 0x3) != 0 || i2 != i1 + 1 || i3 != i1 + 2 || i4 != i1 + 3))
+          goto NotConsecutiveRegs;
+
+        opReg = x86PackRegAndVvvvv(o0.id(), i1);
+        rmRel = &o5;
+        goto EmitVexEvexM;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingVexRmi_Wx:
+      opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o1));
+      goto CaseVexRmi;
+
+    case InstDB::kEncodingVexRmi_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRmi:
+CaseVexRmi:
+      immValue = o2.as<Imm>().value();
+      immSize = 1;
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRvm:
+CaseVexRvm:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+CaseVexRvm_R:
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRvm_ZDX_Wx: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      if (ASMJIT_UNLIKELY(!o3.isNone() && !Reg::isGp(o3, Gp::kIdDx)))
+        goto InvalidInstruction;
+      ASMJIT_FALLTHROUGH;
+    }
+
+    case InstDB::kEncodingVexRvm_Wx: {
+      opcode.addWIf(Reg::isGpq(o0) | (o2.size() == 8));
+      goto CaseVexRvm;
+    }
+
+    case InstDB::kEncodingVexRvm_Lx_KEvex: {
+      opcode.forceEvexIf(Reg::isKReg(o0));
+      ASMJIT_FALLTHROUGH;
+    }
+
+    case InstDB::kEncodingVexRvm_Lx: {
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      goto CaseVexRvm;
+    }
+
+    case InstDB::kEncodingVexRvm_Lx_2xK: {
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        // Two registers are encoded as a single register.
+        //   - First K register must be even.
+        //   - Second K register must be first+1.
+        if ((o0.id() & 1) != 0 || o0.id() + 1 != o1.id())
+          goto InvalidPhysId;
+
+        const Operand_& o3 = opExt[EmitterUtils::kOp3];
+
+        opcode |= x86OpcodeLBySize(o2.size());
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+
+        if (o3.isReg()) {
+          rbReg = o3.id();
+          goto EmitVexEvexR;
+        }
+
+        if (o3.isMem()) {
+          rmRel = &o3;
+          goto EmitVexEvexM;
+        }
+      }
+      break;
+    }
+
+    case InstDB::kEncodingVexRvmr_Lx: {
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+    }
+
+    case InstDB::kEncodingVexRvmr: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const uint32_t isign4 = isign3 + (uint32_t(o3.opType()) << 9);
+
+      immValue = o3.id() << 4;
+      immSize = 1;
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingVexRvmi_KEvex:
+      opcode.forceEvexIf(Reg::isKReg(o0));
+      goto VexRvmi;
+
+    case InstDB::kEncodingVexRvmi_Lx_KEvex:
+      opcode.forceEvexIf(Reg::isKReg(o0));
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRvmi_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRvmi:
+VexRvmi:
+    {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const uint32_t isign4 = isign3 + (uint32_t(o3.opType()) << 9);
+
+      immValue = o3.as<Imm>().value();
+      immSize = 1;
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Mem, Imm)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingVexRmv_Wx:
+      opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o2));
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRmv:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRmvRm_VM:
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opcode  = x86AltOpcodeOf(instInfo);
+        opcode |= Support::max(x86OpcodeLByVMem(o1), x86OpcodeLBySize(o0.size()));
+
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRmv_VM:
+      if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
+        opcode |= Support::max(x86OpcodeLByVMem(o1), x86OpcodeLBySize(o0.size() | o2.size()));
+
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+
+    case InstDB::kEncodingVexRmvi: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const uint32_t isign4 = isign3 + (uint32_t(o3.opType()) << 9);
+
+      immValue = o3.as<Imm>().value();
+      immSize = 1;
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Mem, Reg, Imm)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingVexMovdMovq:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        if (Reg::isGp(o0)) {
+          opcode = x86AltOpcodeOf(instInfo);
+          opcode.addWBySize(o0.size());
+          opReg = o1.id();
+          rbReg = o0.id();
+          goto EmitVexEvexR;
+        }
+
+        if (Reg::isGp(o1)) {
+          opcode.addWBySize(o1.size());
+          opReg = o0.id();
+          rbReg = o1.id();
+          goto EmitVexEvexR;
+        }
+
+        // If this is a 'W' version (movq) then allow also vmovq 'xmm|xmm' form.
+        if (opcode & Opcode::kEvex_W_1) {
+          opcode &= ~(Opcode::kPP_VEXMask | Opcode::kMM_Mask | 0xFF);
+          opcode |=  (Opcode::kF30F00 | 0x7E);
+
+          opReg = o0.id();
+          rbReg = o1.id();
+          goto EmitVexEvexR;
+        }
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        if (opcode & Opcode::kEvex_W_1) {
+          opcode &= ~(Opcode::kPP_VEXMask | Opcode::kMM_Mask | 0xFF);
+          opcode |=  (Opcode::kF30F00 | 0x7E);
+        }
+
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+
+      // The following instruction uses the secondary opcode.
+      opcode = x86AltOpcodeOf(instInfo);
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        if (opcode & Opcode::kEvex_W_1) {
+          opcode &= ~(Opcode::kPP_VEXMask | Opcode::kMM_Mask | 0xFF);
+          opcode |=  (Opcode::k660F00 | 0xD6);
+        }
+
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRmMr_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRmMr:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+
+      // The following instruction uses the secondary opcode.
+      opcode &= Opcode::kLL_Mask;
+      opcode |= x86AltOpcodeOf(instInfo);
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRvmRmv:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rbReg = o1.id();
+
+        if (!Support::test(options, InstOptions::kX86_ModMR))
+          goto EmitVexEvexR;
+
+        opcode.addW();
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
+        opcode.addW();
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRvmRmi_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRvmRmi:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+
+      // The following instructions use the secondary opcode.
+      opcode &= Opcode::kLL_Mask;
+      opcode |= x86AltOpcodeOf(instInfo);
+
+      immValue = o2.as<Imm>().value();
+      immSize = 1;
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRvmRmvRmi:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rbReg = o1.id();
+
+        if (!Support::test(options, InstOptions::kX86_ModMR))
+          goto EmitVexEvexR;
+
+        opcode.addW();
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
+        opcode.addW();
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+
+      // The following instructions use the secondary opcode.
+      opcode = x86AltOpcodeOf(instInfo);
+
+      immValue = o2.as<Imm>().value();
+      immSize = 1;
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRvmMr:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+
+      // The following instructions use the secondary opcode.
+      opcode = x86AltOpcodeOf(instInfo);
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o1.id();
+        rbReg = o0.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRvmMvr_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRvmMvr:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+
+      // The following instruction uses the secondary opcode.
+      opcode &= Opcode::kLL_Mask;
+      opcode |= x86AltOpcodeOf(instInfo);
+
+      if (isign3 == ENC_OPS3(Mem, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o2.id(), o1.id());
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRvmVmi_Lx_MEvex:
+      opcode.forceEvexIf(o1.isMem());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRvmVmi_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRvmVmi:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+
+      // The following instruction uses the secondary opcode.
+      opcode &= Opcode::kLL_Mask | Opcode::kMM_ForceEvex;
+      opcode |= x86AltOpcodeOf(instInfo);
+      opReg = opcode.extractModO();
+
+      immValue = o2.as<Imm>().value();
+      immSize = 1;
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opReg = x86PackRegAndVvvvv(opReg, o0.id());
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
+        opReg = x86PackRegAndVvvvv(opReg, o0.id());
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexVm_Wx:
+      opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o1));
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexVm:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(opReg, o0.id());
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = x86PackRegAndVvvvv(opReg, o0.id());
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexVmi_Lx_MEvex:
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm))
+        opcode.forceEvex();
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexVmi_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexVmi:
+      immValue = o2.as<Imm>().value();
+      immSize = 1;
+
+CaseVexVmi_AfterImm:
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opReg = x86PackRegAndVvvvv(opReg, o0.id());
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
+        opReg = x86PackRegAndVvvvv(opReg, o0.id());
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexVmi4_Wx:
+      opcode.addWIf(Reg::isGpq(o0) || o1.size() == 8);
+      immValue = o2.as<Imm>().value();
+      immSize = 4;
+      goto CaseVexVmi_AfterImm;
+
+    case InstDB::kEncodingVexRvrmRvmr_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRvrmRvmr: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const uint32_t isign4 = isign3 + (uint32_t(o3.opType()) << 9);
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+
+        immValue = o3.id() << 4;
+        immSize = 1;
+        goto EmitVexEvexR;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
+        opcode.addW();
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o3;
+
+        immValue = o2.id() << 4;
+        immSize = 1;
+        goto EmitVexEvexM;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+
+        immValue = o3.id() << 4;
+        immSize = 1;
+        goto EmitVexEvexM;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingVexRvrmiRvmri_Lx: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const Operand_& o4 = opExt[EmitterUtils::kOp4];
+
+      if (ASMJIT_UNLIKELY(!o4.isImm()))
+        goto InvalidInstruction;
+
+      const uint32_t isign4 = isign3 + (uint32_t(o3.opType()) << 9);
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size() | o2.size() | o3.size());
+
+      immValue = o4.as<Imm>().valueAs<uint8_t>() & 0x0F;
+      immSize = 1;
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+
+        immValue |= o3.id() << 4;
+        goto EmitVexEvexR;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
+        opcode.addW();
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o3;
+
+        immValue |= o2.id() << 4;
+        goto EmitVexEvexM;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+
+        immValue |= o3.id() << 4;
+        goto EmitVexEvexM;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingVexMovssMovsd:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        goto CaseVexRvm_R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode = x86AltOpcodeOf(instInfo);
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    // FMA4 Instructions
+    // -----------------
+
+    case InstDB::kEncodingFma4_Lx:
+      // It's fine to just check the first operand, second is just for sanity.
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingFma4: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const uint32_t isign4 = isign3 + (uint32_t(o3.opType()) << 9);
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+
+        if (!Support::test(options, InstOptions::kX86_ModMR)) {
+          // MOD/RM - Encoding preferred by LLVM.
+          opcode.addW();
+          rbReg = o3.id();
+
+          immValue = o2.id() << 4;
+          immSize = 1;
+          goto EmitVexEvexR;
+        }
+        else {
+          // MOD/MR - Alternative encoding.
+          rbReg = o2.id();
+
+          immValue = o3.id() << 4;
+          immSize = 1;
+          goto EmitVexEvexR;
+        }
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
+        opcode.addW();
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o3;
+
+        immValue = o2.id() << 4;
+        immSize = 1;
+        goto EmitVexEvexM;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+
+        immValue = o3.id() << 4;
+        immSize = 1;
+        goto EmitVexEvexM;
+      }
+      break;
+    }
+
+    // AMX Instructions
+    // ----------------
+
+    case InstDB::kEncodingAmxCfg:
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingAmxR:
+      if (isign3 == ENC_OPS1(Reg)) {
+        opReg = o0.id();
+        rbReg = 0;
+        goto EmitVexEvexR;
+      }
+      break;
+
+    case InstDB::kEncodingAmxRm:
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingAmxMr:
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingAmxRmv:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+      break;
+  }
+
+  goto InvalidInstruction;
+
+  // Emit - X86 Opcode
+  // -----------------
+
+EmitX86OpMovAbs:
+  immSize = FastUInt8(registerSize());
+  writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
+
+EmitX86Op:
+  // Emit mandatory instruction prefix.
+  writer.emitPP(opcode.v);
+
+  // Emit REX prefix (64-bit only).
+  {
+    uint32_t rex = opcode.extractRex(options);
+    if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
+      goto InvalidRexPrefix;
+    rex &= ~kX86ByteInvalidRex & 0xFF;
+    writer.emit8If(rex | kX86ByteRex, rex != 0);
+  }
+
+  // Emit instruction opcodes.
+  writer.emitMMAndOpcode(opcode.v);
+  writer.emitImmediate(uint64_t(immValue), immSize);
+  goto EmitDone;
+
+  // Emit - X86 - Opcode + Reg
+  // -------------------------
+
+EmitX86OpReg:
+  // Emit mandatory instruction prefix.
+  writer.emitPP(opcode.v);
+
+  // Emit REX prefix (64-bit only).
+  {
+    uint32_t rex = opcode.extractRex(options) | (opReg >> 3); // Rex.B (0x01).
+    if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
+      goto InvalidRexPrefix;
+    rex &= ~kX86ByteInvalidRex & 0xFF;
+    writer.emit8If(rex | kX86ByteRex, rex != 0);
+
+    opReg &= 0x7;
+  }
+
+  // Emit instruction opcodes.
+  opcode += opReg;
+  writer.emitMMAndOpcode(opcode.v);
+  writer.emitImmediate(uint64_t(immValue), immSize);
+  goto EmitDone;
+
+  // Emit - X86 - Opcode with Implicit <mem> Operand
+  // -----------------------------------------------
+
+EmitX86OpImplicitMem:
+  rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
+  if (ASMJIT_UNLIKELY(rmRel->as<Mem>().hasOffset() || (rmInfo & kX86MemInfo_Index)))
+    goto InvalidInstruction;
+
+  // Emit mandatory instruction prefix.
+  writer.emitPP(opcode.v);
+
+  // Emit REX prefix (64-bit only).
+  {
+    uint32_t rex = opcode.extractRex(options);
+    if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
+      goto InvalidRexPrefix;
+    rex &= ~kX86ByteInvalidRex & 0xFF;
+    writer.emit8If(rex | kX86ByteRex, rex != 0);
+  }
+
+  // Emit override prefixes.
+  writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
+  writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
+
+  // Emit instruction opcodes.
+  writer.emitMMAndOpcode(opcode.v);
+
+  // Emit immediate value.
+  writer.emitImmediate(uint64_t(immValue), immSize);
+  goto EmitDone;
+
+  // Emit - X86 - Opcode /r - Register
+  // ---------------------------------
+
+EmitX86R:
+  // Mandatory instruction prefix.
+  writer.emitPP(opcode.v);
+
+  // Emit REX prefix (64-bit only).
+  {
+    uint32_t rex = opcode.extractRex(options) |
+                   ((opReg & 0x08) >> 1) | // REX.R (0x04).
+                   ((rbReg & 0x08) >> 3) ; // REX.B (0x01).
+
+    if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
+      goto InvalidRexPrefix;
+    rex &= ~kX86ByteInvalidRex & 0xFF;
+    writer.emit8If(rex | kX86ByteRex, rex != 0);
+
+    opReg &= 0x07;
+    rbReg &= 0x07;
+  }
+
+  // Emit instruction opcodes.
+  writer.emitMMAndOpcode(opcode.v);
+
+  // Emit ModR.
+  writer.emit8(x86EncodeMod(3, opReg, rbReg));
+
+  // Emit immediate value.
+  writer.emitImmediate(uint64_t(immValue), immSize);
+  goto EmitDone;
+
+  // Emit - X86 - Opcode /r - Memory Base
+  // ------------------------------------
+
+EmitX86RFromM:
+  rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
+  if (ASMJIT_UNLIKELY(rmRel->as<Mem>().hasOffset() || (rmInfo & kX86MemInfo_Index)))
+    goto InvalidInstruction;
+
+  // Emit mandatory instruction prefix.
+  writer.emitPP(opcode.v);
+
+  // Emit REX prefix (64-bit only).
+  {
+    uint32_t rex = opcode.extractRex(options) |
+                   ((opReg & 0x08) >> 1) | // REX.R (0x04).
+                   ((rbReg       ) >> 3) ; // REX.B (0x01).
+
+    if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
+      goto InvalidRexPrefix;
+    rex &= ~kX86ByteInvalidRex & 0xFF;
+    writer.emit8If(rex | kX86ByteRex, rex != 0);
+
+    opReg &= 0x07;
+    rbReg &= 0x07;
+  }
+
+  // Emit override prefixes.
+  writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
+  writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
+
+  // Emit instruction opcodes.
+  writer.emitMMAndOpcode(opcode.v);
+
+  // Emit ModR/M.
+  writer.emit8(x86EncodeMod(3, opReg, rbReg));
+
+  // Emit immediate value.
+  writer.emitImmediate(uint64_t(immValue), immSize);
+  goto EmitDone;
+
+  // Emit - X86 - Opcode /r - memory Operand
+  // ---------------------------------------
+
+EmitX86M:
+  // `rmRel` operand must be memory.
+  ASMJIT_ASSERT(rmRel != nullptr);
+  ASMJIT_ASSERT(rmRel->opType() == OperandType::kMem);
+  ASMJIT_ASSERT((opcode & Opcode::kCDSHL_Mask) == 0);
+
+  // Emit override prefixes.
+  rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
+  writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
+
+  memOpAOMark = writer.cursor();
+  writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
+
+  // Emit mandatory instruction prefix.
+  writer.emitPP(opcode.v);
+
+  // Emit REX prefix (64-bit only).
+  rbReg = rmRel->as<Mem>().baseId();
+  rxReg = rmRel->as<Mem>().indexId();
+  {
+    uint32_t rex;
+
+    rex  = (rbReg >> 3) & 0x01; // REX.B (0x01).
+    rex |= (rxReg >> 2) & 0x02; // REX.X (0x02).
+    rex |= (opReg >> 1) & 0x04; // REX.R (0x04).
+
+    rex &= rmInfo;
+    rex |= opcode.extractRex(options);
+
+    if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
+      goto InvalidRexPrefix;
+    rex &= ~kX86ByteInvalidRex & 0xFF;
+    writer.emit8If(rex | kX86ByteRex, rex != 0);
+
+    opReg &= 0x07;
+  }
+
+  // Emit instruction opcodes.
+  writer.emitMMAndOpcode(opcode.v);
+
+  // ... Fall through ...
+
+  // Emit - MOD/SIB
+  // --------------
+
+EmitModSib:
+  if (!(rmInfo & (kX86MemInfo_Index | kX86MemInfo_67H_X86))) {
+    // ==========|> [BASE + DISP8|DISP32].
+    if (rmInfo & kX86MemInfo_BaseGp) {
+      rbReg &= 0x7;
+      relOffset = rmRel->as<Mem>().offsetLo32();
+
+      uint32_t mod = x86EncodeMod(0, opReg, rbReg);
+      bool forceSIB = commonInfo->isTsibOp();
+
+      if (rbReg == Gp::kIdSp || forceSIB) {
+        // TSIB or [XSP|R12].
+        mod = (mod & 0xF8u) | 0x04u;
+        if (rbReg != Gp::kIdBp && relOffset == 0) {
+          writer.emit8(mod);
+          writer.emit8(x86EncodeSib(0, 4, rbReg));
+        }
+        // TSIB or [XSP|R12 + DISP8|DISP32].
+        else {
+          uint32_t cdShift = (opcode & Opcode::kCDSHL_Mask) >> Opcode::kCDSHL_Shift;
+          int32_t cdOffset = relOffset >> cdShift;
+
+          if (Support::isInt8(cdOffset) && relOffset == int32_t(uint32_t(cdOffset) << cdShift)) {
+            writer.emit8(mod + 0x40); // <- MOD(1, opReg, rbReg).
+            writer.emit8(x86EncodeSib(0, 4, rbReg));
+            writer.emit8(cdOffset & 0xFF);
+          }
+          else {
+            writer.emit8(mod + 0x80); // <- MOD(2, opReg, rbReg).
+            writer.emit8(x86EncodeSib(0, 4, rbReg));
+            writer.emit32uLE(uint32_t(relOffset));
+          }
+        }
+      }
+      else if (rbReg != Gp::kIdBp && relOffset == 0) {
+        // [BASE].
+        writer.emit8(mod);
+      }
+      else {
+        // [BASE + DISP8|DISP32].
+        uint32_t cdShift = (opcode & Opcode::kCDSHL_Mask) >> Opcode::kCDSHL_Shift;
+        int32_t cdOffset = relOffset >> cdShift;
+
+        if (Support::isInt8(cdOffset) && relOffset == int32_t(uint32_t(cdOffset) << cdShift)) {
+          writer.emit8(mod + 0x40);
+          writer.emit8(cdOffset & 0xFF);
+        }
+        else {
+          writer.emit8(mod + 0x80);
+          writer.emit32uLE(uint32_t(relOffset));
+        }
+      }
+    }
+    // ==========|> [ABSOLUTE | DISP32].
+    else if (!(rmInfo & (kX86MemInfo_BaseLabel | kX86MemInfo_BaseRip))) {
+      Mem::AddrType addrType = rmRel->as<Mem>().addrType();
+      relOffset = rmRel->as<Mem>().offsetLo32();
+
+      if (is32Bit()) {
+        // Explicit relative addressing doesn't work in 32-bit mode.
+        if (ASMJIT_UNLIKELY(addrType == Mem::AddrType::kRel))
+          goto InvalidAddress;
+
+        writer.emit8(x86EncodeMod(0, opReg, 5));
+        writer.emit32uLE(uint32_t(relOffset));
+      }
+      else {
+        bool isOffsetI32 = rmRel->as<Mem>().offsetHi32() == (relOffset >> 31);
+        bool isOffsetU32 = rmRel->as<Mem>().offsetHi32() == 0;
+        uint64_t baseAddress = code()->baseAddress();
+
+        // If relative addressing was not explicitly set then we can try to guess. By guessing we check some
+        // properties of the memory operand and try to base the decision on the segment prefix and the address type.
+        if (addrType == Mem::AddrType::kDefault) {
+          if (baseAddress == Globals::kNoBaseAddress) {
+            // Prefer absolute addressing mode if the offset is 32-bit.
+            addrType = isOffsetI32 || isOffsetU32 ? Mem::AddrType::kAbs
+                                                  : Mem::AddrType::kRel;
+          }
+          else {
+            // Prefer absolute addressing mode if FS|GS segment override is present.
+            bool hasFsGs = rmRel->as<Mem>().segmentId() >= SReg::kIdFs;
+            // Prefer absolute addressing mode if this is LEA with 32-bit immediate.
+            bool isLea32 = (instId == Inst::kIdLea) && (isOffsetI32 || isOffsetU32);
+
+            addrType = hasFsGs || isLea32 ? Mem::AddrType::kAbs
+                                          : Mem::AddrType::kRel;
+          }
+        }
+
+        if (addrType == Mem::AddrType::kRel) {
+          uint32_t kModRel32Size = 5;
+          uint64_t virtualOffset = uint64_t(writer.offsetFrom(_bufferData)) + immSize + kModRel32Size;
+
+          if (baseAddress == Globals::kNoBaseAddress || _section->id() != 0) {
+            // Create a new RelocEntry as we cannot calculate the offset right now.
+            err = _code->newRelocEntry(&re, RelocType::kAbsToRel);
+            if (ASMJIT_UNLIKELY(err))
+              goto Failed;
+
+            writer.emit8(x86EncodeMod(0, opReg, 5));
+
+            re->_sourceSectionId = _section->id();
+            re->_sourceOffset = offset();
+            re->_format.resetToSimpleValue(OffsetType::kSignedOffset, 4);
+            re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize);
+            re->_payload = uint64_t(rmRel->as<Mem>().offset());
+
+            writer.emit32uLE(0);
+            writer.emitImmediate(uint64_t(immValue), immSize);
+            goto EmitDone;
+          }
+          else {
+            uint64_t rip64 = baseAddress + _section->offset() + virtualOffset;
+            uint64_t rel64 = uint64_t(rmRel->as<Mem>().offset()) - rip64;
+
+            if (Support::isInt32(int64_t(rel64))) {
+              writer.emit8(x86EncodeMod(0, opReg, 5));
+              writer.emit32uLE(uint32_t(rel64 & 0xFFFFFFFFu));
+              writer.emitImmediate(uint64_t(immValue), immSize);
+              goto EmitDone;
+            }
+            else {
+              // We must check the original address type as we have modified
+              // `addrType`. We failed if the original address type is 'rel'.
+              if (ASMJIT_UNLIKELY(rmRel->as<Mem>().isRel()))
+                goto InvalidAddress;
+            }
+          }
+        }
+
+        // Handle unsigned 32-bit address that doesn't work with sign extension. Consider the following instructions:
+        //
+        //   1. lea rax, [-1]         - Sign extended to 0xFFFFFFFFFFFFFFFF
+        //   2. lea rax, [0xFFFFFFFF] - Zero extended to 0x00000000FFFFFFFF
+        //   3. add rax, [-1]         - Sign extended to 0xFFFFFFFFFFFFFFFF
+        //   4. add rax, [0xFFFFFFFF] - Zero extended to 0x00000000FFFFFFFF
+        //
+        // Sign extension is naturally performed by the CPU so we don't have to bother, however, zero extension
+        // requires address-size override prefix, which we probably don't have at this moment. So to make the address
+        // valid we need to insert it at `memOpAOMark` if it's not already there.
+        //
+        // If this is 'lea' instruction then it's possible to remove REX.W part from REX prefix (if it's there), which
+        // would be one-byte shorter than inserting address-size override.
+        //
+        // NOTE: If we don't do this then these instructions are unencodable.
+        if (!isOffsetI32) {
+          // 64-bit absolute address is unencodable.
+          if (ASMJIT_UNLIKELY(!isOffsetU32))
+            goto InvalidAddress64Bit;
+
+          // We only patch the existing code if we don't have address-size override.
+          if (*memOpAOMark != 0x67) {
+            if (instId == Inst::kIdLea) {
+              // LEA: Remove REX.W, if present. This is easy as we know that 'lea' doesn't use any PP prefix so if REX
+              // prefix was emitted it would be at `memOpAOMark`.
+              uint32_t rex = *memOpAOMark;
+              if (rex & kX86ByteRex) {
+                rex &= (~kX86ByteRexW) & 0xFF;
+                *memOpAOMark = uint8_t(rex);
+
+                // We can remove the REX prefix completely if it was not forced.
+                if (rex == kX86ByteRex && !Support::test(options, InstOptions::kX86_Rex))
+                  writer.remove8(memOpAOMark);
+              }
+            }
+            else {
+              // Any other instruction: Insert address-size override prefix.
+              writer.insert8(memOpAOMark, 0x67);
+            }
+          }
+        }
+
+        // Emit 32-bit absolute address.
+        writer.emit8(x86EncodeMod(0, opReg, 4));
+        writer.emit8(x86EncodeSib(0, 4, 5));
+        writer.emit32uLE(uint32_t(relOffset));
+      }
+    }
+    // ==========|> [LABEL|RIP + DISP32]
+    else {
+      writer.emit8(x86EncodeMod(0, opReg, 5));
+
+      if (is32Bit()) {
+EmitModSib_LabelRip_X86:
+        if (ASMJIT_UNLIKELY(_code->_relocations.willGrow(_code->allocator()) != kErrorOk))
+          goto OutOfMemory;
+
+        relOffset = rmRel->as<Mem>().offsetLo32();
+        if (rmInfo & kX86MemInfo_BaseLabel) {
+          // [LABEL->ABS].
+          label = _code->labelEntry(rmRel->as<Mem>().baseId());
+          if (ASMJIT_UNLIKELY(!label))
+            goto InvalidLabel;
+
+          err = _code->newRelocEntry(&re, RelocType::kRelToAbs);
+          if (ASMJIT_UNLIKELY(err))
+            goto Failed;
+
+          re->_sourceSectionId = _section->id();
+          re->_sourceOffset = offset();
+          re->_format.resetToSimpleValue(OffsetType::kUnsignedOffset, 4);
+          re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize);
+          re->_payload = uint64_t(int64_t(relOffset));
+
+          if (label->isBound()) {
+            // Label bound to the current section.
+            re->_payload += label->offset();
+            re->_targetSectionId = label->section()->id();
+            writer.emit32uLE(0);
+          }
+          else {
+            // Non-bound label or label bound to a different section.
+            relOffset = -4 - immSize;
+            relSize = 4;
+            goto EmitRel;
+          }
+        }
+        else {
+          // [RIP->ABS].
+          err = _code->newRelocEntry(&re, RelocType::kRelToAbs);
+          if (ASMJIT_UNLIKELY(err))
+            goto Failed;
+
+          re->_sourceSectionId = _section->id();
+          re->_targetSectionId = _section->id();
+          re->_format.resetToSimpleValue(OffsetType::kUnsignedOffset, 4);
+          re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize);
+          re->_sourceOffset = offset();
+          re->_payload = re->_sourceOffset + re->_format.regionSize() + uint64_t(int64_t(relOffset));
+
+          writer.emit32uLE(0);
+        }
+      }
+      else {
+        relOffset = rmRel->as<Mem>().offsetLo32();
+        if (rmInfo & kX86MemInfo_BaseLabel) {
+          // [RIP].
+          label = _code->labelEntry(rmRel->as<Mem>().baseId());
+          if (ASMJIT_UNLIKELY(!label))
+            goto InvalidLabel;
+
+          relOffset -= (4 + immSize);
+          if (label->isBoundTo(_section)) {
+            // Label bound to the current section.
+            relOffset += int32_t(label->offset() - writer.offsetFrom(_bufferData));
+            writer.emit32uLE(uint32_t(relOffset));
+          }
+          else {
+            // Non-bound label or label bound to a different section.
+            relSize = 4;
+            goto EmitRel;
+          }
+        }
+        else {
+          // [RIP].
+          writer.emit32uLE(uint32_t(relOffset));
+        }
+      }
+    }
+  }
+  else if (!(rmInfo & kX86MemInfo_67H_X86)) {
+    // ESP|RSP can't be used as INDEX in pure SIB mode, however, VSIB mode allows XMM4|YMM4|ZMM4 (that's why the
+    // check is before the label).
+    if (ASMJIT_UNLIKELY(rxReg == Gp::kIdSp))
+      goto InvalidAddressIndex;
+
+EmitModVSib:
+    rxReg &= 0x7;
+
+    // ==========|> [BASE + INDEX + DISP8|DISP32].
+    if (rmInfo & kX86MemInfo_BaseGp) {
+      rbReg &= 0x7;
+      relOffset = rmRel->as<Mem>().offsetLo32();
+
+      uint32_t mod = x86EncodeMod(0, opReg, 4);
+      uint32_t sib = x86EncodeSib(rmRel->as<Mem>().shift(), rxReg, rbReg);
+
+      if (relOffset == 0 && rbReg != Gp::kIdBp) {
+        // [BASE + INDEX << SHIFT].
+        writer.emit8(mod);
+        writer.emit8(sib);
+      }
+      else {
+        uint32_t cdShift = (opcode & Opcode::kCDSHL_Mask) >> Opcode::kCDSHL_Shift;
+        int32_t cdOffset = relOffset >> cdShift;
+
+        if (Support::isInt8(cdOffset) && relOffset == int32_t(uint32_t(cdOffset) << cdShift)) {
+          // [BASE + INDEX << SHIFT + DISP8].
+          writer.emit8(mod + 0x40); // <- MOD(1, opReg, 4).
+          writer.emit8(sib);
+          writer.emit8(uint32_t(cdOffset));
+        }
+        else {
+          // [BASE + INDEX << SHIFT + DISP32].
+          writer.emit8(mod + 0x80); // <- MOD(2, opReg, 4).
+          writer.emit8(sib);
+          writer.emit32uLE(uint32_t(relOffset));
+        }
+      }
+    }
+    // ==========|> [INDEX + DISP32].
+    else if (!(rmInfo & (kX86MemInfo_BaseLabel | kX86MemInfo_BaseRip))) {
+      // [INDEX << SHIFT + DISP32].
+      writer.emit8(x86EncodeMod(0, opReg, 4));
+      writer.emit8(x86EncodeSib(rmRel->as<Mem>().shift(), rxReg, 5));
+
+      relOffset = rmRel->as<Mem>().offsetLo32();
+      writer.emit32uLE(uint32_t(relOffset));
+    }
+    // ==========|> [LABEL|RIP + INDEX + DISP32].
+    else {
+      if (is32Bit()) {
+        writer.emit8(x86EncodeMod(0, opReg, 4));
+        writer.emit8(x86EncodeSib(rmRel->as<Mem>().shift(), rxReg, 5));
+        goto EmitModSib_LabelRip_X86;
+      }
+      else {
+        // NOTE: This also handles VSIB+RIP, which is not allowed in 64-bit mode.
+        goto InvalidAddress;
+      }
+    }
+  }
+  else {
+    // 16-bit address mode (32-bit mode with 67 override prefix).
+    relOffset = (int32_t(rmRel->as<Mem>().offsetLo32()) << 16) >> 16;
+
+    // NOTE: 16-bit addresses don't use SIB byte and their encoding differs. We use a table-based approach to
+    // calculate the proper MOD byte as it's easier. Also, not all BASE [+ INDEX] combinations are supported
+    // in 16-bit mode, so this may fail.
+    const uint32_t kBaseGpIdx = (kX86MemInfo_BaseGp | kX86MemInfo_Index);
+
+    if (rmInfo & kBaseGpIdx) {
+      // ==========|> [BASE + INDEX + DISP16].
+      uint32_t mod;
+
+      rbReg &= 0x7;
+      rxReg &= 0x7;
+
+      if ((rmInfo & kBaseGpIdx) == kBaseGpIdx) {
+        uint32_t shf = rmRel->as<Mem>().shift();
+        if (ASMJIT_UNLIKELY(shf != 0))
+          goto InvalidAddress;
+        mod = x86Mod16BaseIndexTable[(rbReg << 3) + rxReg];
+      }
+      else {
+        if (rmInfo & kX86MemInfo_Index)
+          rbReg = rxReg;
+        mod = x86Mod16BaseTable[rbReg];
+      }
+
+      if (ASMJIT_UNLIKELY(mod == 0xFF))
+        goto InvalidAddress;
+
+      mod += opReg << 3;
+      if (relOffset == 0 && mod != 0x06) {
+        writer.emit8(mod);
+      }
+      else if (Support::isInt8(relOffset)) {
+        writer.emit8(mod + 0x40);
+        writer.emit8(uint32_t(relOffset));
+      }
+      else {
+        writer.emit8(mod + 0x80);
+        writer.emit16uLE(uint32_t(relOffset));
+      }
+    }
+    else {
+      // Not supported in 16-bit addresses.
+      if (rmInfo & (kX86MemInfo_BaseRip | kX86MemInfo_BaseLabel))
+        goto InvalidAddress;
+
+      // ==========|> [DISP16].
+      writer.emit8(opReg | 0x06);
+      writer.emit16uLE(uint32_t(relOffset));
+    }
+  }
+
+  writer.emitImmediate(uint64_t(immValue), immSize);
+  goto EmitDone;
+
+  // Emit - FPU
+  // ----------
+
+EmitFpuOp:
+  // Mandatory instruction prefix.
+  writer.emitPP(opcode.v);
+
+  // FPU instructions consist of two opcodes.
+  writer.emit8(opcode.v >> Opcode::kFPU_2B_Shift);
+  writer.emit8(opcode.v);
+  goto EmitDone;
+
+  // Emit - VEX Opcode
+  // -----------------
+
+EmitVexOp:
+  {
+    // These don't use immediate.
+    ASMJIT_ASSERT(immSize == 0);
+
+    // Only 'vzeroall' and 'vzeroupper' instructions use this encoding, they don't define 'W' to be '1' so we can
+    // just check the 'mmmmm' field. Both functions can encode by using VEX2 prefix so VEX3 is basically only used
+    // when specified as instruction option.
+    ASMJIT_ASSERT((opcode & Opcode::kW) == 0);
+
+    uint32_t x = (uint32_t(opcode  & Opcode::kMM_Mask      ) >> (Opcode::kMM_Shift     )) |
+                 (uint32_t(opcode  & Opcode::kLL_Mask      ) >> (Opcode::kLL_Shift - 10)) |
+                 (uint32_t(opcode  & Opcode::kPP_VEXMask   ) >> (Opcode::kPP_Shift -  8)) ;
+
+    if (Support::test(options, InstOptions::kX86_Vex3)) {
+      x  = (x & 0xFFFF) << 8;                               // [00000000|00000Lpp|000mmmmm|00000000].
+      x ^= (kX86ByteVex3) |                                 // [........|00000Lpp|000mmmmm|__VEX3__].
+           (0x07u  << 13) |                                 // [........|00000Lpp|111mmmmm|__VEX3__].
+           (0x0Fu  << 19) |                                 // [........|01111Lpp|111mmmmm|__VEX3__].
+           (opcode << 24) ;                                 // [_OPCODE_|01111Lpp|111mmmmm|__VEX3__].
+
+      writer.emit32uLE(x);
+      goto EmitDone;
+    }
+    else {
+      x = ((x >> 8) ^ x) ^ 0xF9;
+      writer.emit8(kX86ByteVex2);
+      writer.emit8(x);
+      writer.emit8(opcode.v);
+      goto EmitDone;
+    }
+  }
+
+  // Emit - VEX|EVEX - /r - Register
+  // -------------------------------
+
+EmitVexEvexR:
+  {
+    // Construct `x` - a complete EVEX|VEX prefix.
+    uint32_t x = ((opReg << 4) & 0xF980u) |                 // [........|........|Vvvvv..R|R.......].
+                 ((rbReg << 2) & 0x0060u) |                 // [........|........|........|.BB.....].
+                 (opcode.extractLLMMMMM(options)) |         // [........|.LL.....|Vvvvv..R|RBBmmmmm].
+                 (_extraReg.id() << 16);                    // [........|.LL..aaa|Vvvvv..R|RBBmmmmm].
+    opReg &= 0x7;
+
+    // Handle AVX512 options by a single branch.
+    const InstOptions kAvx512Options = InstOptions::kX86_ZMask | InstOptions::kX86_ER | InstOptions::kX86_SAE;
+    if (Support::test(options, kAvx512Options)) {
+      static constexpr uint32_t kBcstMask = 0x1 << 20;
+      static constexpr uint32_t kLLMask10 = 0x2 << 21;
+      static constexpr uint32_t kLLMask11 = 0x3 << 21;
+
+      // Designed to be easily encodable so the position must be exact. The {rz-sae} is encoded as {11},
+      // so it should match the mask.
+      static_assert(uint32_t(InstOptions::kX86_RZ_SAE) == kLLMask11,
+                    "This code requires InstOptions::X86_RZ_SAE to match kLLMask11 to work properly");
+
+      x |= uint32_t(options & InstOptions::kX86_ZMask);     // [........|zLLb.aaa|Vvvvv..R|RBBmmmmm].
+
+      // Support embedded-rounding {er} and suppress-all-exceptions {sae}.
+      if (Support::test(options, InstOptions::kX86_ER | InstOptions::kX86_SAE)) {
+        // Embedded rounding is only encodable if the instruction is either scalar or it's a 512-bit
+        // operation as the {er} rounding predicate collides with LL part of the instruction.
+        if ((x & kLLMask11) != kLLMask10) {
+          // Ok, so LL is not 10, thus the instruction must be scalar. Scalar instructions don't
+          // support broadcast so if this instruction supports it {er} nor {sae} would be encodable.
+          if (ASMJIT_UNLIKELY(commonInfo->hasAvx512B()))
+            goto InvalidEROrSAE;
+        }
+
+        if (Support::test(options, InstOptions::kX86_ER)) {
+          if (ASMJIT_UNLIKELY(!commonInfo->hasAvx512ER()))
+            goto InvalidEROrSAE;
+
+          x &=~kLLMask11;                                   // [........|.00..aaa|Vvvvv..R|RBBmmmmm].
+          x |= kBcstMask | (uint32_t(options) & kLLMask11); // [........|.LLb.aaa|Vvvvv..R|RBBmmmmm].
+        }
+        else {
+          if (ASMJIT_UNLIKELY(!commonInfo->hasAvx512SAE()))
+            goto InvalidEROrSAE;
+
+          x &=~kLLMask11;                                   // [........|.00..aaa|Vvvvv..R|RBBmmmmm].
+          x |= kBcstMask;                                   // [........|.00b.aaa|Vvvvv..R|RBBmmmmm].
+        }
+      }
+    }
+
+    // These bits would force EVEX prefix.
+    constexpr uint32_t kEvexForce = 0x00000010u;            // [........|........|........|...x....].
+    constexpr uint32_t kEvexBits = 0x00D78150u;             // [........|xx.x.xxx|x......x|.x.x....].
+
+    // Force EVEX prefix even in case the instruction has VEX encoding, because EVEX encoding is preferred. At the
+    // moment this is only required by AVX_VNNI instructions, which were added after AVX512_VNNI instructions. If
+    // such instruction doesn't specify prefix, EVEX (AVX512_VNNI) is selected by default.
+    if (commonInfo->preferEvex()) {
+      if ((x & kEvexBits) == 0 && !Support::test(options, InstOptions::kX86_Vex | InstOptions::kX86_Vex3)) {
+        x |= kEvexForce;
+      }
+    }
+
+    // Check if EVEX is required by checking bits in `x` :     [........|xx.x.xxx|x......x|.x.x....].
+    if (x & kEvexBits) {
+      uint32_t y = ((x << 4) & 0x00080000u) |               // [........|...bV...|........|........].
+                   ((x >> 4) & 0x00000010u) ;               // [........|...bV...|........|...R....].
+      x  = (x & 0x00FF78EFu) | y;                           // [........|zLLbVaaa|0vvvv000|RBBRmmmm].
+      x  = x << 8;                                          // [zLLbVaaa|0vvvv000|RBBRmmmm|00000000].
+      x |= (opcode >> kVSHR_W    ) & 0x00800000u;           // [zLLbVaaa|Wvvvv000|RBBRmmmm|00000000].
+      x |= (opcode >> kVSHR_PP_EW) & 0x00830000u;           // [zLLbVaaa|Wvvvv0pp|RBBRmmmm|00000000] (added PP and EVEX.W).
+                                                            //      _     ____    ____
+      x ^= 0x087CF000u | kX86ByteEvex;                      // [zLLbVaaa|Wvvvv1pp|RBBRmmmm|01100010].
+
+      writer.emit32uLE(x);
+      writer.emit8(opcode.v);
+
+      rbReg &= 0x7;
+      writer.emit8(x86EncodeMod(3, opReg, rbReg));
+      writer.emitImmByteOrDWord(uint64_t(immValue), immSize);
+      goto EmitDone;
+    }
+
+    // Not EVEX, prepare `x` for VEX2 or VEX3:             x = [........|00L00000|0vvvv000|R0Bmmmmm].
+    x |= ((opcode >> (kVSHR_W  + 8)) & 0x8000u) |           // [00000000|00L00000|Wvvvv000|R0Bmmmmm].
+         ((opcode >> (kVSHR_PP + 8)) & 0x0300u) |           // [00000000|00L00000|0vvvv0pp|R0Bmmmmm].
+         ((x      >> 11            ) & 0x0400u) ;           // [00000000|00L00000|WvvvvLpp|R0Bmmmmm].
+    x |= x86GetForceEvex3MaskInLastBit(options);            // [x0000000|00L00000|WvvvvLpp|R0Bmmmmm].
+
+    // Check if VEX3 is required / forced:                     [x.......|........|x.......|..xxxxx.].
+    if (x & 0x8000803Eu) {
+      uint32_t xorMsk = x86VEXPrefix[x & 0xF] | (opcode << 24);
+
+      // Clear all high bits.
+      x  = (x & 0xFFFF) << 8;                               // [00000000|WvvvvLpp|R0Bmmmmm|00000000].
+                                                            //            ____    _ _
+      x ^= xorMsk;                                          // [_OPCODE_|WvvvvLpp|R1Bmmmmm|VEX3|XOP].
+      writer.emit32uLE(x);
+
+      rbReg &= 0x7;
+      writer.emit8(x86EncodeMod(3, opReg, rbReg));
+      writer.emitImmByteOrDWord(uint64_t(immValue), immSize);
+      goto EmitDone;
+    }
+    else {
+      // 'mmmmm' must be '00001'.
+      ASMJIT_ASSERT((x & 0x1F) == 0x01);
+
+      x = ((x >> 8) ^ x) ^ 0xF9;
+      writer.emit8(kX86ByteVex2);
+      writer.emit8(x);
+      writer.emit8(opcode.v);
+
+      rbReg &= 0x7;
+      writer.emit8(x86EncodeMod(3, opReg, rbReg));
+      writer.emitImmByteOrDWord(uint64_t(immValue), immSize);
+      goto EmitDone;
+    }
+  }
+
+  // Emit - VEX|EVEX - /r - Memory
+  // -----------------------------
+
+EmitVexEvexM:
+  ASMJIT_ASSERT(rmRel != nullptr);
+  ASMJIT_ASSERT(rmRel->opType() == OperandType::kMem);
+
+  rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
+  writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
+
+  memOpAOMark = writer.cursor();
+  writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
+
+  rbReg = rmRel->as<Mem>().hasBaseReg()  ? rmRel->as<Mem>().baseId()  : uint32_t(0);
+  rxReg = rmRel->as<Mem>().hasIndexReg() ? rmRel->as<Mem>().indexId() : uint32_t(0);
+
+  {
+    uint32_t broadcastBit = uint32_t(rmRel->as<Mem>().hasBroadcast());
+
+    // Construct `x` - a complete EVEX|VEX prefix.
+    uint32_t x = ((opReg <<  4) & 0x0000F980u)  |           // [........|........|Vvvvv..R|R.......].
+                 ((rxReg <<  3) & 0x00000040u)  |           // [........|........|........|.X......].
+                 ((rxReg << 15) & 0x00080000u)  |           // [........|....X...|........|........].
+                 ((rbReg <<  2) & 0x00000020u)  |           // [........|........|........|..B.....].
+                 opcode.extractLLMMMMM(options) |           // [........|.LL.X...|Vvvvv..R|RXBmmmmm].
+                 (_extraReg.id()    << 16)      |           // [........|.LL.Xaaa|Vvvvv..R|RXBmmmmm].
+                 (broadcastBit      << 20)      ;           // [........|.LLbXaaa|Vvvvv..R|RXBmmmmm].
+    opReg &= 0x07u;
+
+    // Mark invalid VEX (force EVEX) case:                  // [@.......|.LLbXaaa|Vvvvv..R|RXBmmmmm].
+    x |= uint32_t(~commonInfo->flags() & InstDB::InstFlags::kVex) << (31 - Support::ConstCTZ<uint32_t(InstDB::InstFlags::kVex)>::value);
+
+    // Handle AVX512 options by a single branch.
+    const InstOptions kAvx512Options = InstOptions::kX86_ZMask   |
+                                       InstOptions::kX86_ER      |
+                                       InstOptions::kX86_SAE     ;
+    if (Support::test(options, kAvx512Options)) {
+      // {er} and {sae} are both invalid if memory operand is used.
+      if (ASMJIT_UNLIKELY(Support::test(options, InstOptions::kX86_ER | InstOptions::kX86_SAE)))
+        goto InvalidEROrSAE;
+
+      x |= uint32_t(options & InstOptions::kX86_ZMask);     // [@.......|zLLbXaaa|Vvvvv..R|RXBmmmmm].
+    }
+
+    // If these bits are used then EVEX prefix is required.
+    constexpr uint32_t kEvexForce = 0x00000010u;            // [........|........|........|...x....].
+    constexpr uint32_t kEvexBits = 0x80DF8110u;             // [@.......|xx.xxxxx|x......x|...x....].
+
+    // Force EVEX prefix even in case the instruction has VEX encoding, because EVEX encoding is preferred. At the
+    // moment this is only required for AVX_VNNI instructions, which were added after AVX512_VNNI instructions. If
+    // such instruction doesn't specify prefix, EVEX (AVX512_VNNI) would be used by default,
+    if (commonInfo->preferEvex()) {
+      if ((x & kEvexBits) == 0 && !Support::test(options, InstOptions::kX86_Vex | InstOptions::kX86_Vex3)) {
+        x |= kEvexForce;
+      }
+    }
+
+    // Check if EVEX is required by checking bits in `x` :     [@.......|xx.xxxxx|x......x|...x....].
+    if (x & kEvexBits) {
+      uint32_t y = ((x << 4) & 0x00080000u) |               // [@.......|....V...|........|........].
+                   ((x >> 4) & 0x00000010u) ;               // [@.......|....V...|........|...R....].
+      x  = (x & 0x00FF78EFu) | y;                           // [........|zLLbVaaa|0vvvv000|RXBRmmmm].
+      x  = x << 8;                                          // [zLLbVaaa|0vvvv000|RBBRmmmm|00000000].
+      x |= (opcode >> kVSHR_W    ) & 0x00800000u;           // [zLLbVaaa|Wvvvv000|RBBRmmmm|00000000].
+      x |= (opcode >> kVSHR_PP_EW) & 0x00830000u;           // [zLLbVaaa|Wvvvv0pp|RBBRmmmm|00000000] (added PP and EVEX.W).
+                                                            //      _     ____    ____
+      x ^= 0x087CF000u | kX86ByteEvex;                      // [zLLbVaaa|Wvvvv1pp|RBBRmmmm|01100010].
+
+      if (x & 0x10000000u) {
+        // Broadcast support.
+        //
+        // 1. Verify our LL field is correct as broadcast changes the "size" of the source operand. For example if
+        //    a broadcasted operand is qword_ptr[X] {1to8} the source size becomes 64 and not 8 as the memory operand
+        //    would report.
+        //
+        // 2. Change the compressed displacement scale to either x2 (SHL1), x4 (SHL 2), or x8 (SHL 3) depending on
+        //    the broadcast unit/element size.
+        uint32_t broadcastUnitSize = commonInfo->broadcastSize();
+        uint32_t broadcastVectorSize = broadcastUnitSize << uint32_t(rmRel->as<Mem>().getBroadcast());
+
+        if (ASMJIT_UNLIKELY(broadcastUnitSize == 0))
+          goto InvalidBroadcast;
+
+        // LL was already shifted 8 bits right.
+        constexpr uint32_t kLLShift = 21 + 8;
+
+        uint32_t currentLL = x & (0x3u << kLLShift);
+        uint32_t broadcastLL = (Support::max<uint32_t>(Support::ctz(broadcastVectorSize), 4) - 4) << kLLShift;
+
+        if (broadcastLL > (2u << kLLShift))
+          goto InvalidBroadcast;
+
+        uint32_t newLL = Support::max(currentLL, broadcastLL);
+        x = (x & ~(uint32_t(0x3) << kLLShift)) | newLL;
+
+        opcode &=~uint32_t(Opcode::kCDSHL_Mask);
+        opcode |= Support::ctz(broadcastUnitSize) << Opcode::kCDSHL_Shift;
+      }
+      else {
+        // Add the compressed displacement 'SHF' to the opcode based on 'TTWLL'.
+        // The index to `x86CDisp8SHL` is composed as `CDTT[4:3] | W[2] | LL[1:0]`.
+        uint32_t TTWLL = ((opcode >> (Opcode::kCDTT_Shift - 3)) & 0x18) +
+                         ((opcode >> (Opcode::kW_Shift    - 2)) & 0x04) +
+                         ((x >> 29) & 0x3);
+        opcode += x86CDisp8SHL[TTWLL];
+      }
+
+      writer.emit32uLE(x);
+      writer.emit8(opcode.v);
+    }
+    else {
+      // Not EVEX, prepare `x` for VEX2 or VEX3:           x = [........|00L00000|0vvvv000|RXBmmmmm].
+      x |= ((opcode >> (kVSHR_W  + 8)) & 0x8000u) |         // [00000000|00L00000|Wvvvv000|RXBmmmmm].
+           ((opcode >> (kVSHR_PP + 8)) & 0x0300u) |         // [00000000|00L00000|Wvvvv0pp|RXBmmmmm].
+           ((x      >> 11            ) & 0x0400u) ;         // [00000000|00L00000|WvvvvLpp|RXBmmmmm].
+      x |= x86GetForceEvex3MaskInLastBit(options);          // [x0000000|00L00000|WvvvvLpp|RXBmmmmm].
+
+      // Clear a possible CDisp specified by EVEX.
+      opcode &= ~Opcode::kCDSHL_Mask;
+
+      // Check if VEX3 is required / forced:                   [x.......|........|x.......|.xxxxxx.].
+      if (x & 0x8000807Eu) {
+        uint32_t xorMsk = x86VEXPrefix[x & 0xF] | (opcode << 24);
+
+        // Clear all high bits.
+        x  = (x & 0xFFFF) << 8;                             // [00000000|WvvvvLpp|RXBmmmmm|00000000].
+                                                            //            ____    ___
+        x ^= xorMsk;                                        // [_OPCODE_|WvvvvLpp|RXBmmmmm|VEX3_XOP].
+        writer.emit32uLE(x);
+      }
+      else {
+        // 'mmmmm' must be '00001'.
+        ASMJIT_ASSERT((x & 0x1F) == 0x01);
+
+        x = ((x >> 8) ^ x) ^ 0xF9;
+        writer.emit8(kX86ByteVex2);
+        writer.emit8(x);
+        writer.emit8(opcode.v);
+      }
+    }
+  }
+
+  // MOD|SIB address.
+  if (!commonInfo->hasFlag(InstDB::InstFlags::kVsib))
+    goto EmitModSib;
+
+  // MOD|VSIB address without INDEX is invalid.
+  if (rmInfo & kX86MemInfo_Index)
+    goto EmitModVSib;
+  goto InvalidInstruction;
+
+  // Emit - Jmp/Jcc/Call
+  // -------------------
+
+EmitJmpCall:
+  {
+    // Emit REX prefix if asked for (64-bit only).
+    uint32_t rex = opcode.extractRex(options);
+    if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
+      goto InvalidRexPrefix;
+    rex &= ~kX86ByteInvalidRex & 0xFF;
+    writer.emit8If(rex | kX86ByteRex, rex != 0);
+
+    uint64_t ip = uint64_t(writer.offsetFrom(_bufferData));
+    uint32_t rel32 = 0;
+    uint32_t opCode8 = x86AltOpcodeOf(instInfo);
+
+    uint32_t inst8Size  = 1 + 1; //          OPCODE + REL8 .
+    uint32_t inst32Size = 1 + 4; // [PREFIX] OPCODE + REL32.
+
+    // Jcc instructions with 32-bit displacement use 0x0F prefix,
+    // other instructions don't. No other prefixes are used by X86.
+    ASMJIT_ASSERT((opCode8 & Opcode::kMM_Mask) == 0);
+    ASMJIT_ASSERT((opcode  & Opcode::kMM_Mask) == 0 ||
+                  (opcode  & Opcode::kMM_Mask) == Opcode::kMM_0F);
+
+    // Only one of these should be used at the same time.
+    inst32Size += uint32_t(opReg != 0);
+    inst32Size += uint32_t((opcode & Opcode::kMM_Mask) == Opcode::kMM_0F);
+
+    if (rmRel->isLabel()) {
+      label = _code->labelEntry(rmRel->as<Label>());
+      if (ASMJIT_UNLIKELY(!label))
+        goto InvalidLabel;
+
+      if (label->isBoundTo(_section)) {
+        // Label bound to the current section.
+        rel32 = uint32_t((label->offset() - ip - inst32Size) & 0xFFFFFFFFu);
+        goto EmitJmpCallRel;
+      }
+      else {
+        // Non-bound label or label bound to a different section.
+        if (opCode8 && (!opcode.v || Support::test(options, InstOptions::kShortForm))) {
+          writer.emit8(opCode8);
+
+          // Record DISP8 (non-bound label).
+          relOffset = -1;
+          relSize = 1;
+          goto EmitRel;
+        }
+        else {
+          // Refuse also 'short' prefix, if specified.
+          if (ASMJIT_UNLIKELY(!opcode.v || Support::test(options, InstOptions::kShortForm)))
+            goto InvalidDisplacement;
+
+          writer.emit8If(0x0F, (opcode & Opcode::kMM_Mask) != 0);// Emit 0F prefix.
+          writer.emit8(opcode.v);                                // Emit opcode.
+          writer.emit8If(x86EncodeMod(3, opReg, 0), opReg != 0); // Emit MOD.
+
+          // Record DISP32 (non-bound label).
+          relOffset = -4;
+          relSize = 4;
+          goto EmitRel;
+        }
+      }
+    }
+
+    if (rmRel->isImm()) {
+      uint64_t baseAddress = code()->baseAddress();
+      uint64_t jumpAddress = rmRel->as<Imm>().valueAs<uint64_t>();
+
+      // If the base-address is known calculate a relative displacement and check if it fits in 32 bits (which is
+      // always true in 32-bit mode). Emit relative displacement as it was a bound label if all checks are ok.
+      if (baseAddress != Globals::kNoBaseAddress) {
+        uint64_t rel64 = jumpAddress - (ip + baseAddress) - inst32Size;
+        if (Environment::is32Bit(arch()) || Support::isInt32(int64_t(rel64))) {
+          rel32 = uint32_t(rel64 & 0xFFFFFFFFu);
+          goto EmitJmpCallRel;
+        }
+        else {
+          // Relative displacement exceeds 32-bits - relocator can only insert trampoline for jmp/call, but not
+          // for jcc/jecxz.
+          if (ASMJIT_UNLIKELY(!x86IsJmpOrCall(instId)))
+            goto InvalidDisplacement;
+        }
+      }
+
+      err = _code->newRelocEntry(&re, RelocType::kAbsToRel);
+      if (ASMJIT_UNLIKELY(err))
+        goto Failed;
+
+      re->_sourceOffset = offset();
+      re->_sourceSectionId = _section->id();
+      re->_payload = jumpAddress;
+
+      if (ASMJIT_LIKELY(opcode.v)) {
+        // 64-bit: Emit REX prefix so the instruction can be patched later. REX prefix does nothing if not patched,
+        // but allows to patch the instruction to use MOD/M and to point to a memory where the final 64-bit address
+        // is stored.
+        if (Environment::is64Bit(arch()) && x86IsJmpOrCall(instId)) {
+          if (!rex)
+            writer.emit8(kX86ByteRex);
+
+          err = _code->addAddressToAddressTable(jumpAddress);
+          if (ASMJIT_UNLIKELY(err))
+            goto Failed;
+
+          re->_relocType = RelocType::kX64AddressEntry;
+        }
+
+        writer.emit8If(0x0F, (opcode & Opcode::kMM_Mask) != 0);  // Emit 0F prefix.
+        writer.emit8(opcode.v);                                  // Emit opcode.
+        writer.emit8If(x86EncodeMod(3, opReg, 0), opReg != 0);   // Emit MOD.
+        re->_format.resetToSimpleValue(OffsetType::kSignedOffset, 4);
+        re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize);
+        writer.emit32uLE(0);                                     // Emit DISP32.
+      }
+      else {
+        writer.emit8(opCode8);                                   // Emit opcode.
+        re->_format.resetToSimpleValue(OffsetType::kSignedOffset, 1);
+        re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize);
+        writer.emit8(0);                                         // Emit DISP8 (zero).
+      }
+      goto EmitDone;
+    }
+
+    // Not Label|Imm -> Invalid.
+    goto InvalidInstruction;
+
+    // Emit jmp/call with relative displacement known at assembly-time. Decide between 8-bit and 32-bit displacement
+    // encoding. Some instructions only allow either 8-bit or 32-bit encoding, others allow both encodings.
+EmitJmpCallRel:
+    if (Support::isInt8(int32_t(rel32 + inst32Size - inst8Size)) && opCode8 && !Support::test(options, InstOptions::kLongForm)) {
+      options |= InstOptions::kShortForm;
+      writer.emit8(opCode8);                                     // Emit opcode
+      writer.emit8(rel32 + inst32Size - inst8Size);              // Emit DISP8.
+      goto EmitDone;
+    }
+    else {
+      if (ASMJIT_UNLIKELY(!opcode.v || Support::test(options, InstOptions::kShortForm)))
+        goto InvalidDisplacement;
+
+      options &= ~InstOptions::kShortForm;
+      writer.emit8If(0x0F, (opcode & Opcode::kMM_Mask) != 0);    // Emit 0x0F prefix.
+      writer.emit8(opcode.v);                                    // Emit Opcode.
+      writer.emit8If(x86EncodeMod(3, opReg, 0), opReg != 0);     // Emit MOD.
+      writer.emit32uLE(rel32);                                   // Emit DISP32.
+      goto EmitDone;
+    }
+  }
+
+  // Emit - Relative
+  // ---------------
+
+EmitRel:
+  {
+    ASMJIT_ASSERT(relSize == 1 || relSize == 4);
+
+    // Chain with label.
+    size_t offset = size_t(writer.offsetFrom(_bufferData));
+    OffsetFormat of;
+    of.resetToSimpleValue(OffsetType::kSignedOffset, relSize);
+
+    LabelLink* link = _code->newLabelLink(label, _section->id(), offset, relOffset, of);
+    if (ASMJIT_UNLIKELY(!link))
+      goto OutOfMemory;
+
+    if (re)
+      link->relocId = re->id();
+
+    // Emit dummy zeros, must be patched later when the reference becomes known.
+    writer.emitZeros(relSize);
+  }
+  writer.emitImmediate(uint64_t(immValue), immSize);
+
+  // Emit - Done
+  // -----------
+
+EmitDone:
+  if (Support::test(options, InstOptions::kReserved)) {
+#ifndef ASMJIT_NO_LOGGING
+    if (_logger)
+      EmitterUtils::logInstructionEmitted(this, instId, options, o0, o1, o2, opExt, relSize, immSize, writer.cursor());
+#endif
+  }
+
+  resetExtraReg();
+  resetInstOptions();
+  resetInlineComment();
+
+  writer.done(this);
+  return kErrorOk;
+
+  // Error Handler
+  // -------------
+
+#define ERROR_HANDLER(ERR) ERR: err = DebugUtils::errored(kError##ERR); goto Failed;
+  ERROR_HANDLER(OutOfMemory)
+  ERROR_HANDLER(InvalidLabel)
+  ERROR_HANDLER(InvalidInstruction)
+  ERROR_HANDLER(InvalidLockPrefix)
+  ERROR_HANDLER(InvalidXAcquirePrefix)
+  ERROR_HANDLER(InvalidXReleasePrefix)
+  ERROR_HANDLER(InvalidRepPrefix)
+  ERROR_HANDLER(InvalidRexPrefix)
+  ERROR_HANDLER(InvalidEROrSAE)
+  ERROR_HANDLER(InvalidAddress)
+  ERROR_HANDLER(InvalidAddressIndex)
+  ERROR_HANDLER(InvalidAddress64Bit)
+  ERROR_HANDLER(InvalidDisplacement)
+  ERROR_HANDLER(InvalidPhysId)
+  ERROR_HANDLER(InvalidSegment)
+  ERROR_HANDLER(InvalidImmediate)
+  ERROR_HANDLER(InvalidBroadcast)
+  ERROR_HANDLER(OperandSizeMismatch)
+  ERROR_HANDLER(AmbiguousOperandSize)
+  ERROR_HANDLER(NotConsecutiveRegs)
+#undef ERROR_HANDLER
+
+Failed:
+#ifndef ASMJIT_NO_LOGGING
+  return EmitterUtils::logInstructionFailed(this, err, instId, options, o0, o1, o2, opExt);
+#else
+  resetExtraReg();
+  resetInstOptions();
+  resetInlineComment();
+  return reportError(err);
+#endif
+}
+
+//x86::Assembler - Align
+// =====================
+
+Error Assembler::align(AlignMode alignMode, uint32_t alignment) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  if (ASMJIT_UNLIKELY(uint32_t(alignMode) > uint32_t(AlignMode::kMaxValue)))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  if (alignment <= 1)
+    return kErrorOk;
+
+  if (ASMJIT_UNLIKELY(!Support::isPowerOf2(alignment) || alignment > Globals::kMaxAlignment))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  uint32_t i = uint32_t(Support::alignUpDiff<size_t>(offset(), alignment));
+  if (i > 0) {
+    CodeWriter writer(this);
+    ASMJIT_PROPAGATE(writer.ensureSpace(this, i));
+
+    uint8_t pattern = 0x00;
+    switch (alignMode) {
+      case AlignMode::kCode: {
+        if (hasEncodingOption(EncodingOptions::kOptimizedAlign)) {
+          // Intel 64 and IA-32 Architectures Software Developer's Manual - Volume 2B (NOP).
+          enum { kMaxNopSize = 9 };
+
+          static const uint8_t nopData[kMaxNopSize][kMaxNopSize] = {
+            { 0x90 },
+            { 0x66, 0x90 },
+            { 0x0F, 0x1F, 0x00 },
+            { 0x0F, 0x1F, 0x40, 0x00 },
+            { 0x0F, 0x1F, 0x44, 0x00, 0x00 },
+            { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 },
+            { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 },
+            { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
+            { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }
+          };
+
+          do {
+            uint32_t n = Support::min<uint32_t>(i, kMaxNopSize);
+            const uint8_t* src = nopData[n - 1];
+
+            i -= n;
+            do {
+              writer.emit8(*src++);
+            } while (--n);
+          } while (i);
+        }
+
+        pattern = 0x90;
+        break;
+      }
+
+      case AlignMode::kData:
+        pattern = 0xCC;
+        break;
+
+      case AlignMode::kZero:
+        // Pattern already set to zero.
+        break;
+    }
+
+    while (i) {
+      writer.emit8(pattern);
+      i--;
+    }
+
+    writer.done(this);
+  }
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger) {
+    StringTmp<128> sb;
+    sb.appendChars(' ', _logger->indentation(FormatIndentationGroup::kCode));
+    sb.appendFormat("align %u\n", alignment);
+    _logger->log(sb);
+  }
+#endif
+
+  return kErrorOk;
+}
+
+// x86::Assembler - Events
+// =======================
+
+Error Assembler::onAttach(CodeHolder* code) noexcept {
+  Arch arch = code->arch();
+  ASMJIT_PROPAGATE(Base::onAttach(code));
+
+  if (Environment::is32Bit(arch)) {
+    // 32 bit architecture - X86.
+    _forcedInstOptions |= InstOptions::kX86_InvalidRex;
+    _setAddressOverrideMask(kX86MemInfo_67H_X86);
+  }
+  else {
+    // 64 bit architecture - X64.
+    _forcedInstOptions &= ~InstOptions::kX86_InvalidRex;
+    _setAddressOverrideMask(kX86MemInfo_67H_X64);
+  }
+
+  return kErrorOk;
+}
+
+Error Assembler::onDetach(CodeHolder* code) noexcept {
+  _forcedInstOptions &= ~InstOptions::kX86_InvalidRex;
+  _setAddressOverrideMask(0);
+  return Base::onDetach(code);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86
diff --git a/lib/lepton/asmjit/x86/x86assembler.h b/lib/lepton/asmjit/x86/x86assembler.h
new file mode 100644
index 0000000000..dbffae6289
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86assembler.h
@@ -0,0 +1,685 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86ASSEMBLER_H_INCLUDED
+#define ASMJIT_X86_X86ASSEMBLER_H_INCLUDED
+
+#include "../core/assembler.h"
+#include "../x86/x86emitter.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86/X64 assembler implementation.
+//!
+//! x86::Assembler is a code emitter that emits machine code directly into the \ref CodeBuffer. The assembler is capable
+//! of targeting both 32-bit and 64-bit instruction sets, the instruction set can be configured through \ref CodeHolder.
+//!
+//! ### Basics
+//!
+//! The following example shows a basic use of `x86::Assembler`, how to generate a function that works in both 32-bit
+//! and 64-bit modes, and how to connect \ref JitRuntime, \ref CodeHolder, and `x86::Assembler`.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef int (*SumFunc)(const int* arr, size_t count);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Create a runtime specialized for JIT.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to code.
+//!
+//!   // Decide between 32-bit CDECL, WIN64, and SysV64 calling conventions:
+//!   //   32-BIT - passed all arguments by stack.
+//!   //   WIN64  - passes first 4 arguments by RCX, RDX, R8, and R9.
+//!   //   UNIX64 - passes first 6 arguments by RDI, RSI, RCX, RDX, R8, and R9.
+//!   x86::Gp arr, cnt;
+//!   x86::Gp sum = x86::eax;           // Use EAX as 'sum' as it's a return register.
+//!
+//!   if (ASMJIT_ARCH_BITS == 64) {
+//!   #if defined(_WIN32)
+//!     arr = x86::rcx;                 // First argument (array ptr).
+//!     cnt = x86::rdx;                 // Second argument (number of elements)
+//!   #else
+//!     arr = x86::rdi;                 // First argument (array ptr).
+//!     cnt = x86::rsi;                 // Second argument (number of elements)
+//!   #endif
+//!   }
+//!   else {
+//!     arr = x86::edx;                 // Use EDX to hold the array pointer.
+//!     cnt = x86::ecx;                 // Use ECX to hold the counter.
+//!     // Fetch first and second arguments from [ESP + 4] and [ESP + 8].
+//!     a.mov(arr, x86::ptr(x86::esp, 4));
+//!     a.mov(cnt, x86::ptr(x86::esp, 8));
+//!   }
+//!
+//!   Label Loop = a.newLabel();        // To construct the loop, we need some labels.
+//!   Label Exit = a.newLabel();
+//!
+//!   a.xor_(sum, sum);                 // Clear 'sum' register (shorter than 'mov').
+//!   a.test(cnt, cnt);                 // Border case:
+//!   a.jz(Exit);                       //   If 'cnt' is zero jump to 'Exit' now.
+//!
+//!   a.bind(Loop);                     // Start of a loop iteration.
+//!   a.add(sum, x86::dword_ptr(arr));  // Add int at [arr] to 'sum'.
+//!   a.add(arr, 4);                    // Increment 'arr' pointer.
+//!   a.dec(cnt);                       // Decrease 'cnt'.
+//!   a.jnz(Loop);                      // If not zero jump to 'Loop'.
+//!
+//!   a.bind(Exit);                     // Exit to handle the border case.
+//!   a.ret();                          // Return from function ('sum' == 'eax').
+//!   // ----> x86::Assembler is no longer needed from here and can be destroyed <----
+//!
+//!   SumFunc fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   static const int array[6] = { 4, 8, 15, 16, 23, 42 };
+//!
+//!   int result = fn(array, 6);        // Execute the generated code.
+//!   printf("%d\n", result);           // Print sum of array (108).
+//!
+//!   rt.release(fn);                   // Explicitly remove the function from the runtime
+//!   return 0;                         // Everything successful...
+//! }
+//! ```
+//!
+//! The example should be self-explanatory. It shows how to work with labels, how to use operands, and how to emit
+//! instructions that can use different registers based on runtime selection. It implements 32-bit CDECL, WIN64,
+//! and SysV64 caling conventions and will work on most X86/X64 environments.
+//!
+//! Although functions prologs / epilogs can be implemented manually, AsmJit provides utilities that can be used
+//! to create function prologs and epilogs automatically, see \ref asmjit_function for more details.
+//!
+//! ### Instruction Validation
+//!
+//! Assembler prefers speed over strictness by default. The implementation checks the type of operands and fails
+//! if the signature of types is invalid, however, it does only basic checks regarding registers and their groups
+//! used in instructions. It's possible to pass operands that don't form any valid signature to the implementation
+//! and succeed. This is usually not a problem as Assembler provides typed API so operand types are normally checked
+//! by C++ compiler at compile time, however, Assembler is fully dynamic and its \ref emit() function can be called
+//! with any instruction id, options, and operands. Moreover, it's also possible to form instructions that will be
+//! accepted by the typed API, for example by calling `mov(x86::eax, x86::al)` - the C++ compiler won't see a problem
+//! as both EAX and AL are \ref Gp registers.
+//!
+//! To help with common mistakes AsmJit allows to activate instruction validation. This feature instruments
+//! the Assembler to call \ref InstAPI::validate() before it attempts to encode any instruction.
+//!
+//! The example below illustrates how validation can be turned on:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! int main(int argc, char* argv[]) {
+//!   JitRuntime rt;                    // Create a runtime specialized for JIT.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to code.
+//!
+//!   // Enable strict validation.
+//!   a.addDiagnosticOptions(DiagnosticOptions::kValidateAssembler);
+//!
+//!   // Try to encode invalid or ill-formed instructions.
+//!   Error err;
+//!
+//!   // Invalid instruction.
+//!   err = a.mov(x86::eax, x86::al);
+//!   printf("Status: %s\n", DebugUtils::errorAsString(err));
+//!
+//!   // Invalid instruction.
+//!   err = a.emit(x86::Inst::kIdMovss, x86::eax, x86::xmm0);
+//!   printf("Status: %s\n", DebugUtils::errorAsString(err));
+//!
+//!   // Ambiguous operand size - the pointer requires size.
+//!   err = a.inc(x86::ptr(x86::rax), 1);
+//!   printf("Status: %s\n", DebugUtils::errorAsString(err));
+//!
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Native Registers
+//!
+//! All emitters provide functions to construct machine-size registers depending on the target. This feature is
+//! for users that want to write code targeting both 32-bit and 64-bit architectures at the same time. In AsmJit
+//! terminology such registers have prefix `z`, so for example on X86 architecture the following native registers
+//! are provided:
+//!
+//!   - `zax` - mapped to either `eax` or `rax`
+//!   - `zbx` - mapped to either `ebx` or `rbx`
+//!   - `zcx` - mapped to either `ecx` or `rcx`
+//!   - `zdx` - mapped to either `edx` or `rdx`
+//!   - `zsp` - mapped to either `esp` or `rsp`
+//!   - `zbp` - mapped to either `ebp` or `rbp`
+//!   - `zsi` - mapped to either `esi` or `rsi`
+//!   - `zdi` - mapped to either `edi` or `rdi`
+//!
+//! They are accessible through \ref x86::Assembler, \ref x86::Builder, and \ref x86::Compiler. The example below
+//! illustrates how to use this feature:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! typedef int (*Func)(void);
+//!
+//! int main(int argc, char* argv[]) {
+//!   JitRuntime rt;                    // Create a runtime specialized for JIT.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to code.
+//!
+//!   // Let's get these registers from x86::Assembler.
+//!   x86::Gp zbp = a.zbp();
+//!   x86::Gp zsp = a.zsp();
+//!
+//!   int stackSize = 32;
+//!
+//!   // Function prolog.
+//!   a.push(zbp);
+//!   a.mov(zbp, zsp);
+//!   a.sub(zsp, stackSize);
+//!
+//!   // ... emit some code (this just sets return value to zero) ...
+//!   a.xor_(x86::eax, x86::eax);
+//!
+//!   // Function epilog and return.
+//!   a.mov(zsp, zbp);
+//!   a.pop(zbp);
+//!   a.ret();
+//!
+//!   // To make the example complete let's call it.
+//!   Func fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!
+//!   int result = fn();                // Execute the generated code.
+//!   printf("%d\n", result);           // Print the resulting "0".
+//!
+//!   rt.release(fn);                   // Remove the function from the runtime.
+//!   return 0;
+//! }
+//! ```
+//!
+//! The example just returns `0`, but the function generated contains a standard prolog and epilog sequence and the
+//! function itself reserves 32 bytes of local stack. The advantage is clear - a single code-base can handle multiple
+//! targets easily. If you want to create a register of native size dynamically by specifying its id it's also possible:
+//!
+//! ```
+//! void example(x86::Assembler& a) {
+//!   x86::Gp zax = a.gpz(x86::Gp::kIdAx);
+//!   x86::Gp zbx = a.gpz(x86::Gp::kIdBx);
+//!   x86::Gp zcx = a.gpz(x86::Gp::kIdCx);
+//!   x86::Gp zdx = a.gpz(x86::Gp::kIdDx);
+//!
+//!   // You can also change register's id easily.
+//!   x86::Gp zsp = zax;
+//!   zsp.setId(4); // or x86::Gp::kIdSp.
+//! }
+//! ```
+//!
+//! ### Data Embedding
+//!
+//! x86::Assembler extends the standard \ref BaseAssembler with X86/X64 specific conventions that are often used by
+//! assemblers to embed data next to the code. The following functions can be used to embed data:
+//!
+//!   - \ref BaseAssembler::embedInt8() - embeds int8_t (portable naming).
+//!   - \ref BaseAssembler::embedUInt8() - embeds uint8_t (portable naming).
+//!   - \ref BaseAssembler::embedInt16() - embeds int16_t (portable naming).
+//!   - \ref BaseAssembler::embedUInt16() - embeds uint16_t (portable naming).
+//!   - \ref BaseAssembler::embedInt32() - embeds int32_t (portable naming).
+//!   - \ref BaseAssembler::embedUInt32() - embeds uint32_t (portable naming).
+//!   - \ref BaseAssembler::embedInt64() - embeds int64_t (portable naming).
+//!   - \ref BaseAssembler::embedUInt64() - embeds uint64_t (portable naming).
+//!   - \ref BaseAssembler::embedFloat() - embeds float (portable naming).
+//!   - \ref BaseAssembler::embedDouble() - embeds double (portable naming).
+//!
+//!   - \ref x86::Assembler::db() - embeds byte (8 bits) (x86 naming).
+//!   - \ref x86::Assembler::dw() - embeds word (16 bits) (x86 naming).
+//!   - \ref x86::Assembler::dd() - embeds dword (32 bits) (x86 naming).
+//!   - \ref x86::Assembler::dq() - embeds qword (64 bits) (x86 naming).
+//!
+//! The following example illustrates how embed works:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! using namespace asmjit;
+//!
+//! void embedData(x86::Assembler& a) {
+//!   a.db(0xFF);         // Embeds 0xFF byte.
+//!   a.dw(0xFF00);       // Embeds 0xFF00 word (little-endian).
+//!   a.dd(0xFF000000);   // Embeds 0xFF000000 dword (little-endian).
+//!   a.embedFloat(0.4f); // Embeds 0.4f (32-bit float, little-endian).
+//! }
+//! ```
+//!
+//! Sometimes it's required to read the data that is embedded after code, for example. This can be done through
+//! \ref Label as shown below:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! using namespace asmjit;
+//!
+//! void embedData(x86::Assembler& a, const Label& L_Data) {
+//!   x86::Gp addr = a.zax();  // EAX or RAX.
+//!   x86::Gp val = x86::edi;  // Where to store some value...
+//!
+//!   // Approach 1 - Load the address to register through LEA. This approach
+//!   //              is flexible as the address can be then manipulated, for
+//!   //              example if you have a data array, which would need index.
+//!   a.lea(addr, L_Data);     // Loads the address of the label to EAX or RAX.
+//!   a.mov(val, dword_ptr(addr));
+//!
+//!   // Approach 2 - Load the data directly by using L_Data in address. It's
+//!   //              worth noting that this doesn't work with indexes in X64
+//!   //              mode. It will use absolute address in 32-bit mode and
+//!   //              relative address (RIP) in 64-bit mode.
+//!   a.mov(val, dword_ptr(L_Data));
+//! }
+//! ```
+//!
+//! ### Label Embedding
+//!
+//! It's also possible to embed labels. In general AsmJit provides the following options:
+//!
+//!   - \ref BaseEmitter::embedLabel() - Embeds absolute address of a label. This is target dependent and would
+//!     embed either 32-bit or 64-bit data that embeds absolute label address. This kind of embedding cannot be
+//!     used in a position independent code.
+//!
+//!   - \ref BaseEmitter::embedLabelDelta() - Embeds a difference between two labels. The size of the difference
+//!     can be specified so it's possible to embed 8-bit, 16-bit, 32-bit, and 64-bit difference, which is sufficient
+//!     for most purposes.
+//!
+//! The following example demonstrates how to embed labels and their differences:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! using namespace asmjit;
+//!
+//! void embedLabel(x86::Assembler& a, const Label& L_Data) {
+//!   // [1] Embed L_Data - the size of the data will be dependent on the target.
+//!   a.embedLabel(L_Data);
+//!
+//!   // [2] Embed a 32-bit difference of two labels.
+//!   Label L_Here = a.newLabel();
+//!   a.bind(L_Here);
+//!   // Embeds int32_t(L_Data - L_Here).
+//!   a.embedLabelDelta(L_Data, L_Here, 4);
+//! }
+//! ```
+//!
+//! ### Using FuncFrame and FuncDetail with x86::Assembler
+//!
+//! The example below demonstrates how \ref FuncFrame and \ref FuncDetail can be used together with \ref x86::Assembler
+//! to generate a function that will use platform dependent calling conventions automatically depending on the target:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
+//!
+//! int main(int argc, char* argv[]) {
+//!   JitRuntime rt;                    // Create JIT Runtime.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to code.
+//!
+//!   // Decide which registers will be mapped to function arguments. Try changing
+//!   // registers of dst, src_a, and src_b and see what happens in function's
+//!   // prolog and epilog.
+//!   x86::Gp dst   = a.zax();
+//!   x86::Gp src_a = a.zcx();
+//!   x86::Gp src_b = a.zdx();
+//!
+//!   X86::Xmm vec0 = x86::xmm0;
+//!   X86::Xmm vec1 = x86::xmm1;
+//!
+//!   // Create/initialize FuncDetail and FuncFrame.
+//!   FuncDetail func;
+//!   func.init(FuncSignatureT<void, int*, const int*, const int*>(CallConvId::kHost));
+//!
+//!   FuncFrame frame;
+//!   frame.init(func);
+//!
+//!   // Make XMM0 and XMM1 dirty - RegGroup::kVec describes XMM|YMM|ZMM registers.
+//!   frame.setDirtyRegs(RegGroup::kVec, IntUtils::mask(0, 1));
+//!
+//!   // Alternatively, if you don't want to use register masks you can pass BaseReg
+//!   // to addDirtyRegs(). The following code would add both xmm0 and xmm1.
+//!   frame.addDirtyRegs(x86::xmm0, x86::xmm1);
+//!
+//!   FuncArgsAssignment args(&func);   // Create arguments assignment context.
+//!   args.assignAll(dst, src_a, src_b);// Assign our registers to arguments.
+//!   args.updateFrameInfo(frame);      // Reflect our args in FuncFrame.
+//!   frame.finalize();                 // Finalize the FuncFrame (updates it).
+//!
+//!   a.emitProlog(frame);              // Emit function prolog.
+//!   a.emitArgsAssignment(frame, args);// Assign arguments to registers.
+//!   a.movdqu(vec0, x86::ptr(src_a));  // Load 4 ints from [src_a] to XMM0.
+//!   a.movdqu(vec1, x86::ptr(src_b));  // Load 4 ints from [src_b] to XMM1.
+//!   a.paddd(vec0, vec1);              // Add 4 ints in XMM1 to XMM0.
+//!   a.movdqu(x86::ptr(dst), vec0);    // Store the result to [dst].
+//!   a.emitEpilog(frame);              // Emit function epilog and return.
+//!
+//!   SumIntsFunc fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error case.
+//!
+//!   // Execute the generated function.
+//!   int inA[4] = { 4, 3, 2, 1 };
+//!   int inB[4] = { 1, 5, 2, 8 };
+//!   int out[4];
+//!   fn(out, inA, inB);
+//!
+//!   // Prints {5 8 4 9}
+//!   printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]);
+//!
+//!   rt.release(fn);
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Using x86::Assembler as Code-Patcher
+//!
+//! This is an advanced topic that is sometimes unavoidable. AsmJit by default appends machine code it generates
+//! into a \ref CodeBuffer, however, it also allows to set the offset in \ref CodeBuffer explicitly and to overwrite
+//! its content. This technique is extremely dangerous as X86 instructions have variable length (see below), so you
+//! should in general only patch code to change instruction's immediate values or some other details not known the
+//! at a time the instruction was emitted. A typical scenario that requires code-patching is when you start emitting
+//! function and you don't know how much stack you want to reserve for it.
+//!
+//! Before we go further it's important to introduce instruction options, because they can help with code-patching
+//! (and not only patching, but that will be explained in AVX-512 section):
+//!
+//!   - Many general-purpose instructions (especially arithmetic ones) on X86 have multiple encodings - in AsmJit
+//!     this is usually called 'short form' and 'long form'.
+//!
+//!   - AsmJit always tries to use 'short form' as it makes the resulting machine-code smaller, which is always
+//!     good - this decision is used by majority of assemblers out there.
+//!
+//!   - AsmJit allows to override the default decision by using `short_()` and `long_()` instruction options to force
+//!     short or long form, respectively. The most useful is `long_()` as it basically forces AsmJit to always emit
+//!     the longest form. The `short_()` is not that useful as it's automatic (except jumps to non-bound labels). Note
+//!     that the underscore after each function name avoids collision with built-in C++ types.
+//!
+//! To illustrate what short form and long form means in binary let's assume we want to emit "add esp, 16" instruction,
+//! which has two possible binary encodings:
+//!
+//!   - `83C410` - This is a short form aka `short add esp, 16` - You can see opcode byte (0x8C), MOD/RM byte (0xC4)
+//!     and an 8-bit immediate value representing `16`.
+//!
+//!   - `81C410000000` - This is a long form aka `long add esp, 16` - You can see a different opcode byte (0x81), the
+//!     same Mod/RM byte (0xC4) and a 32-bit immediate in little-endian representing `16`.
+//!
+//! It should be obvious that patching an existing instruction into an instruction having a different size may create
+//! various problems. So it's recommended to be careful and to only patch instructions into instructions having the
+//! same size. The example below demonstrates how instruction options can be used to guarantee the size of an
+//! instruction by forcing the assembler to use long-form encoding:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! typedef int (*Func)(void);
+//!
+//! int main(int argc, char* argv[]) {
+//!   JitRuntime rt;                    // Create a runtime specialized for JIT.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to code.
+//!
+//!   // Let's get these registers from x86::Assembler.
+//!   x86::Gp zbp = a.zbp();
+//!   x86::Gp zsp = a.zsp();
+//!
+//!   // Function prolog.
+//!   a.push(zbp);
+//!   a.mov(zbp, zsp);
+//!
+//!   // This is where we are gonna patch the code later, so let's get the offset
+//!   // (the current location) from the beginning of the code-buffer.
+//!   size_t patchOffset = a.offset();
+//!   // Let's just emit 'sub zsp, 0' for now, but don't forget to use LONG form.
+//!   a.long_().sub(zsp, 0);
+//!
+//!   // ... emit some code (this just sets return value to zero) ...
+//!   a.xor_(x86::eax, x86::eax);
+//!
+//!   // Function epilog and return.
+//!   a.mov(zsp, zbp);
+//!   a.pop(zbp);
+//!   a.ret();
+//!
+//!   // Now we know how much stack size we want to reserve. I have chosen 128
+//!   // bytes on purpose as it's encodable only in long form that we have used.
+//!
+//!   int stackSize = 128;              // Number of bytes to reserve on the stack.
+//!   a.setOffset(patchOffset);         // Move the current cursor to `patchOffset`.
+//!   a.long_().sub(zsp, stackSize);    // Patch the code; don't forget to use LONG form.
+//!
+//!   // Now the code is ready to be called
+//!   Func fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!
+//!   int result = fn();                // Execute the generated code.
+//!   printf("%d\n", result);           // Print the resulting "0".
+//!
+//!   rt.release(fn);                   // Remove the function from the runtime.
+//!   return 0;
+//! }
+//! ```
+//!
+//! If you run the example it will just work, because both instructions have the same size. As an experiment you can
+//! try removing `long_()` form to see what happens when wrong code is generated.
+//!
+//! ### Code Patching and REX Prefix
+//!
+//! In 64-bit mode there is one more thing to worry about when patching code: REX prefix. It's a single byte prefix
+//! designed to address registers with ids from 9 to 15 and to override the default width of operation from 32 to 64
+//! bits. AsmJit, like other assemblers, only emits REX prefix when it's necessary. If the patched code only changes
+//! the immediate value as shown in the previous example then there is nothing to worry about as it doesn't change
+//! the logic behind emitting REX prefix, however, if the patched code changes register id or overrides the operation
+//! width then it's important to take care of REX prefix as well.
+//!
+//! AsmJit contains another instruction option that controls (forces) REX prefix - `rex()`. If you use it the
+//! instruction emitted will always use REX prefix even when it's encodable without it. The following list contains
+//! some instructions and their binary representations to illustrate when it's emitted:
+//!
+//!   - `__83C410` - `add esp, 16`     - 32-bit operation in 64-bit mode doesn't require REX prefix.
+//!   - `4083C410` - `rex add esp, 16` - 32-bit operation in 64-bit mode with forced REX prefix (0x40).
+//!   - `4883C410` - `add rsp, 16`     - 64-bit operation in 64-bit mode requires REX prefix (0x48).
+//!   - `4183C410` - `add r12d, 16`    - 32-bit operation in 64-bit mode using R12D requires REX prefix (0x41).
+//!   - `4983C410` - `add r12, 16`     - 64-bit operation in 64-bit mode using R12 requires REX prefix (0x49).
+//!
+//! ### More Prefixes
+//!
+//! X86 architecture is known for its prefixes. AsmJit supports all prefixes
+//! that can affect how the instruction is encoded:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! void prefixesExample(x86::Assembler& a) {
+//!   // Lock prefix for implementing atomics:
+//!   //   lock add dword ptr [dst], 1
+//!   a.lock().add(x86::dword_ptr(dst), 1);
+//!
+//!   // Similarly, XAcquire/XRelease prefixes are also available:
+//!   //   xacquire add dword ptr [dst], 1
+//!   a.xacquire().add(x86::dword_ptr(dst), 1);
+//!
+//!   // Rep prefix (see also repe/repz and repne/repnz):
+//!   //   rep movs byte ptr [dst], byte ptr [src]
+//!   a.rep().movs(x86::byte_ptr(dst), x86::byte_ptr(src));
+//!
+//!   // Forcing REX prefix in 64-bit mode.
+//!   //   rex mov eax, 1
+//!   a.rex().mov(x86::eax, 1);
+//!
+//!   // AVX instruction without forced prefix uses the shortest encoding:
+//!   //   vaddpd xmm0, xmm1, xmm2 -> [C5|F1|58|C2]
+//!   a.vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
+//!
+//!   // Forcing VEX3 prefix (AVX):
+//!   //   vex3 vaddpd xmm0, xmm1, xmm2 -> [C4|E1|71|58|C2]
+//!   a.vex3().vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
+//!
+//!   // Forcing EVEX prefix (AVX512):
+//!   //   evex vaddpd xmm0, xmm1, xmm2 -> [62|F1|F5|08|58|C2]
+//!   a.evex().vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
+//!
+//!   // Some instructions accept prefixes not originally intended to:
+//!   //   rep ret
+//!   a.rep().ret();
+//! }
+//! ```
+//!
+//! It's important to understand that prefixes are part of instruction options. When a member function that involves
+//! adding a prefix is called the prefix is combined with existing instruction options, which will affect the next
+//! instruction generated.
+//!
+//! ### Generating AVX512 code.
+//!
+//! x86::Assembler can generate AVX512+ code including the use of opmask registers. Opmask can be specified through
+//! \ref x86::Assembler::k() function, which stores it as an extra register, which will be used by the next
+//! instruction. AsmJit uses such concept for manipulating instruction options as well.
+//!
+//! The following AVX512 features are supported:
+//!
+//!   - Opmask selector {k} and zeroing {z}.
+//!   - Rounding modes {rn|rd|ru|rz} and suppress-all-exceptions {sae} option.
+//!   - AVX512 broadcasts {1toN}.
+//!
+//! The following example demonstrates how AVX512 features can be used:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! void generateAVX512Code(x86::Assembler& a) {
+//!   using namespace x86;
+//!
+//!   // Opmask Selectors
+//!   // ----------------
+//!   //
+//!   //   - Opmask / zeroing is part of the instruction options / extraReg.
+//!   //   - k(reg) is like {kreg} in Intel syntax.
+//!   //   - z() is like {z} in Intel syntax.
+//!
+//!   // vaddpd zmm {k1} {z}, zmm1, zmm2
+//!   a.k(k1).z().vaddpd(zmm0, zmm1, zmm2);
+//!
+//!   // Memory Broadcasts
+//!   // -----------------
+//!   //
+//!   //   - Broadcast data is part of memory operand.
+//!   //   - Use x86::Mem::_1toN(), which returns a new x86::Mem operand.
+//!
+//!   // vaddpd zmm0 {k1} {z}, zmm1, [rcx] {1to8}
+//!   a.k(k1).z().vaddpd(zmm0, zmm1, x86::mem(rcx)._1to8());
+//!
+//!   // Embedded Rounding & Suppress-All-Exceptoins
+//!   // -------------------------------------------
+//!   //
+//!   //   - Rounding mode and {sae} are part of instruction options.
+//!   //   - Use sae() to enable exception suppression.
+//!   //   - Use rn_sae(), rd_sae(), ru_sae(), and rz_sae() - to enable rounding.
+//!   //   - Embedded rounding implicitly sets {sae} as well, that's why the API
+//!   //     also has sae() suffix, to make it clear.
+//!
+//!   // vcmppd k1, zmm1, zmm2, 0x00 {sae}
+//!   a.sae().vcmppd(k1, zmm1, zmm2, 0);
+//!
+//!   // vaddpd zmm0, zmm1, zmm2 {rz}
+//!   a.rz_sae().vaddpd(zmm0, zmm1, zmm2);
+//! }
+//! ```
+class ASMJIT_VIRTAPI Assembler
+  : public BaseAssembler,
+    public EmitterImplicitT<Assembler> {
+public:
+  ASMJIT_NONCOPYABLE(Assembler)
+  typedef BaseAssembler Base;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API explicit Assembler(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Assembler() noexcept;
+
+  //! \}
+
+  //! \cond INTERNAL
+  //! \name Internal
+  //! \{
+
+  // NOTE: x86::Assembler uses _privateData to store 'address-override' bit that is used to decide whether to emit
+  // address-override (67H) prefix based on the memory BASE+INDEX registers. It's either `kX86MemInfo_67H_X86` or
+  // `kX86MemInfo_67H_X64`.
+  inline uint32_t _addressOverrideMask() const noexcept { return _privateData; }
+  inline void _setAddressOverrideMask(uint32_t m) noexcept { _privateData = m; }
+
+  //! \}
+  //! \endcond
+
+  //! \name Emit
+  //! \{
+
+  ASMJIT_API Error _emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) override;
+
+  //! \}
+  //! \endcond
+
+  //! \name Align
+  //! \{
+
+  ASMJIT_API Error align(AlignMode alignMode, uint32_t alignment) override;
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86ASSEMBLER_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86builder.cpp b/lib/lepton/asmjit/x86/x86builder.cpp
new file mode 100644
index 0000000000..a27948b65d
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86builder.cpp
@@ -0,0 +1,52 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86) && !defined(ASMJIT_NO_BUILDER)
+
+#include "../x86/x86assembler.h"
+#include "../x86/x86builder.h"
+#include "../x86/x86emithelper_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::Builder - Construction & Destruction
+// =========================================
+
+Builder::Builder(CodeHolder* code) noexcept : BaseBuilder() {
+  _archMask = (uint64_t(1) << uint32_t(Arch::kX86)) |
+              (uint64_t(1) << uint32_t(Arch::kX64)) ;
+  assignEmitterFuncs(this);
+
+  if (code)
+    code->attach(this);
+}
+Builder::~Builder() noexcept {}
+
+// x86::Builder - Events
+// =====================
+
+Error Builder::onAttach(CodeHolder* code) noexcept {
+  return Base::onAttach(code);
+}
+
+Error Builder::onDetach(CodeHolder* code) noexcept {
+  return Base::onDetach(code);
+}
+
+// x86::Builder - Finalize
+// =======================
+
+Error Builder::finalize() {
+  ASMJIT_PROPAGATE(runPasses());
+  Assembler a(_code);
+  a.addEncodingOptions(encodingOptions());
+  a.addDiagnosticOptions(diagnosticOptions());
+  return serializeTo(&a);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86 && !ASMJIT_NO_BUILDER
diff --git a/lib/lepton/asmjit/x86/x86builder.h b/lib/lepton/asmjit/x86/x86builder.h
new file mode 100644
index 0000000000..f3bb11a0ca
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86builder.h
@@ -0,0 +1,351 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86BUILDER_H_INCLUDED
+#define ASMJIT_X86_X86BUILDER_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_BUILDER
+
+#include "../core/builder.h"
+#include "../x86/x86emitter.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86/X64 builder implementation.
+//!
+//! The code representation used by \ref BaseBuilder is compatible with everything AsmJit provides. Each instruction
+//! is stored as \ref InstNode, which contains instruction id, options, and operands. Each instruction emitted will
+//! create a new \ref InstNode instance and add it to the current cursor in the double-linked list of nodes. Since
+//! the instruction stream used by \ref BaseBuilder can be manipulated, we can rewrite the SumInts example from
+//! \ref asmjit_assembler into the following:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
+//!
+//! // Small helper function to print the current content of `cb`.
+//! static void dumpCode(BaseBuilder& builder, const char* phase) {
+//!   String sb;
+//!   builder.dump(sb);
+//!   printf("%s:\n%s\n", phase, sb.data());
+//! }
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Create JIT Runtime.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Builder cb(&code);           // Create and attach x86::Builder to `code`.
+//!
+//!   // Decide which registers will be mapped to function arguments. Try changing registers
+//!   // of `dst`, `srcA`, and `srcB` and see what happens in function's prolog and epilog.
+//!   x86::Gp dst = cb.zax();
+//!   x86::Gp srcA = cb.zcx();
+//!   x86::Gp srcB = cb.zdx();
+//!
+//!   X86::Xmm vec0 = x86::xmm0;
+//!   X86::Xmm vec1 = x86::xmm1;
+//!
+//!   // Create and initialize `FuncDetail`.
+//!   FuncDetail func;
+//!   func.init(FuncSignatureT<void, int*, const int*, const int*>(CallConvId::kHost));
+//!
+//!   // Remember prolog insertion point.
+//!   BaseNode* prologInsertionPoint = cb.cursor();
+//!
+//!   // Emit function body:
+//!   cb.movdqu(vec0, x86::ptr(srcA));  // Load 4 ints from [srcA] to XMM0.
+//!   cb.movdqu(vec1, x86::ptr(srcB));  // Load 4 ints from [srcB] to XMM1.
+//!   cb.paddd(vec0, vec1);             // Add 4 ints in XMM1 to XMM0.
+//!   cb.movdqu(x86::ptr(dst), vec0);   // Store the result to [dst].
+//!
+//!   // Remember epilog insertion point.
+//!   BaseNode* epilogInsertionPoint = cb.cursor();
+//!
+//!   // Let's see what we have now.
+//!   dumpCode(cb, "Raw Function");
+//!
+//!   // Now, after we emitted the function body, we can insert the prolog, arguments
+//!   // allocation, and epilog. This is not possible with using pure x86::Assembler.
+//!   FuncFrame frame;
+//!   frame.init(func);
+//!
+//!   // Make XMM0 and XMM1 dirty; RegGroup::kVec describes XMM|YMM|ZMM registers.
+//!   frame.setDirtyRegs(RegGroup::kVec, IntUtils::mask(0, 1));
+//!
+//!   FuncArgsAssignment args(&func);   // Create arguments assignment context.
+//!   args.assignAll(dst, srcA, srcB);  // Assign our registers to arguments.
+//!   args.updateFrame(frame);          // Reflect our args in FuncFrame.
+//!   frame.finalize();                 // Finalize the FuncFrame (updates it).
+//!
+//!   // Insert function prolog and allocate arguments to registers.
+//!   cb.setCursor(prologInsertionPoint);
+//!   cb.emitProlog(frame);
+//!   cb.emitArgsAssignment(frame, args);
+//!
+//!   // Insert function epilog.
+//!   cb.setCursor(epilogInsertionPoint);
+//!   cb.emitEpilog(frame);
+//!
+//!   // Let's see how the function's prolog and epilog looks.
+//!   dumpCode(cb, "Prolog & Epilog");
+//!
+//!   // IMPORTANT: Builder requires finalize() to be called to serialize its
+//!   // code to the Assembler (it automatically creates one if not attached).
+//!   cb.finalize();
+//!
+//!   SumIntsFunc fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error case.
+//!
+//!   // Execute the generated function.
+//!   int inA[4] = { 4, 3, 2, 1 };
+//!   int inB[4] = { 1, 5, 2, 8 };
+//!   int out[4];
+//!   fn(out, inA, inB);
+//!
+//!   // Prints {5 8 4 9}
+//!   printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]);
+//!
+//!   rt.release(fn);                   // Explicitly remove the function from the runtime.
+//!   return 0;
+//! }
+//! ```
+//!
+//! When the example is executed it should output the following (this one using AMD64-SystemV ABI):
+//!
+//! ```
+//! Raw Function:
+//! movdqu xmm0, [rcx]
+//! movdqu xmm1, [rdx]
+//! paddd xmm0, xmm1
+//! movdqu [rax], xmm0
+//!
+//! Prolog & Epilog:
+//! mov rax, rdi
+//! mov rcx, rsi
+//! movdqu xmm0, [rcx]
+//! movdqu xmm1, [rdx]
+//! paddd xmm0, xmm1
+//! movdqu [rax], xmm0
+//! ret
+//!
+//! {5 8 4 9}
+//! ```
+//!
+//! The number of use-cases of \ref BaseBuilder is not limited and highly depends on your creativity and experience.
+//! The previous example can be easily improved to collect all dirty registers inside the function programmatically
+//! and to pass them to \ref FuncFrame::setDirtyRegs().
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! // NOTE: This function doesn't cover all possible constructs. It ignores instructions that write
+//! // to implicit registers that are not part of the operand list. It also counts read-only registers.
+//! // Real implementation would be a bit more complicated, but still relatively easy to implement.
+//! static void collectDirtyRegs(const BaseNode* first,
+//!                              const BaseNode* last,
+//!                              Support::Array<RegMask, Globals::kNumVirtGroups>& regMask) {
+//!   const BaseNode* node = first;
+//!   while (node) {
+//!     if (node->actsAsInst()) {
+//!       const InstNode* inst = node->as<InstNode>();
+//!       const Operand* opArray = inst->operands();
+//!
+//!       for (uint32_t i = 0, opCount = inst->opCount(); i < opCount; i++) {
+//!         const Operand& op = opArray[i];
+//!         if (op.isReg()) {
+//!           const x86::Reg& reg = op.as<x86::Reg>();
+//!           if (reg.group() <= RegGroup::kMaxVirt) {
+//!             regMask[reg.group()] |= 1u << reg.id();
+//!           }
+//!         }
+//!       }
+//!     }
+//!
+//!     if (node == last)
+//!       break;
+//!     node = node->next();
+//!   }
+//! }
+//!
+//! static void setDirtyRegsOfFuncFrame(const x86::Builder& builder, FuncFrame& frame) {
+//!   Support::Array<RegMask, Globals::kNumVirtGroups> regMask {};
+//!   collectDirtyRegs(builder.firstNode(), builder.lastNode(), regMask);
+//!
+//!   // X86/X64 ABIs only require to save GP/XMM registers:
+//!   frame.setDirtyRegs(RegGroup::kGp, regMask[RegGroup::kGp]);
+//!   frame.setDirtyRegs(RegGroup::kVec, regMask[RegGroup::kVec]);
+//! }
+//! ```
+//!
+//! ### Casting Between Various Emitters
+//!
+//! Even when \ref BaseAssembler and \ref BaseBuilder provide the same interface as defined by \ref BaseEmitter their
+//! platform dependent variants like \ref x86::Assembler and \ref x86::Builder cannot be interchanged or casted to each
+//! other by using a C++ `static_cast<>`. The main reason is the inheritance graph of these classes is different and
+//! cast-incompatible, as illustrated below:
+//!
+//! ```
+//!                                             +--------------+      +=========================+
+//!                    +----------------------->| x86::Emitter |<--+--# x86::EmitterImplicitT<> #<--+
+//!                    |                        +--------------+   |  +=========================+   |
+//!                    |                           (abstract)      |           (mixin)              |
+//!                    |   +--------------+     +~~~~~~~~~~~~~~+   |                                |
+//!                    +-->| BaseAssembler|---->|x86::Assembler|<--+                                |
+//!                    |   +--------------+     +~~~~~~~~~~~~~~+   |                                |
+//!                    |      (abstract)            (final)        |                                |
+//! +===============+  |   +--------------+     +~~~~~~~~~~~~~~+   |                                |
+//! #  BaseEmitter  #--+-->|  BaseBuilder |--+->| x86::Builder |<--+                                |
+//! +===============+      +--------------+  |  +~~~~~~~~~~~~~~+                                    |
+//!    (abstract)             (abstract)     |      (final)                                         |
+//!                    +---------------------+                                                      |
+//!                    |                                                                            |
+//!                    |   +--------------+     +~~~~~~~~~~~~~~+      +=========================+   |
+//!                    +-->| BaseCompiler |---->| x86::Compiler|<-----# x86::EmitterExplicitT<> #---+
+//!                        +--------------+     +~~~~~~~~~~~~~~+      +=========================+
+//!                           (abstract)            (final)                   (mixin)
+//! ```
+//!
+//! The graph basically shows that it's not possible to cast between \ref x86::Assembler and \ref x86::Builder.
+//! However, since both share the base interface (\ref BaseEmitter) it's possible to cast them to a class that
+//! cannot be instantiated, but defines the same interface - the class is called \ref x86::Emitter and was
+//! introduced to make it possible to write a function that can emit to both \ref x86::Assembler and \ref
+//! x86::Builder. Note that \ref x86::Emitter cannot be created, it's abstract and has private constructors and
+//! destructors; it was only designed to be casted to and used as an interface.
+//!
+//! Each architecture-specific emitter implements a member function called
+//! `as<arch::Emitter>()`, which casts the instance to the architecture
+//! specific emitter as illustrated below:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! static void emitSomething(x86::Emitter* e) {
+//!   e->mov(x86::eax, x86::ebx);
+//! }
+//!
+//! static void assemble(CodeHolder& code, bool useAsm) {
+//!   if (useAsm) {
+//!     x86::Assembler assembler(&code);
+//!     emitSomething(assembler.as<x86::Emitter>());
+//!   }
+//!   else {
+//!     x86::Builder builder(&code);
+//!     emitSomething(builder.as<x86::Emitter>());
+//!
+//!     // NOTE: Builder requires `finalize()` to be called to serialize its
+//!     // content to Assembler (it automatically creates one if not attached).
+//!     builder.finalize();
+//!   }
+//! }
+//! ```
+//!
+//! The example above shows how to create a function that can emit code to either \ref x86::Assembler or \ref
+//! x86::Builder through \ref x86::Emitter, which provides emitter-neutral functionality. \ref x86::Emitter,
+//! however, doesn't provide any emitter-specific functionality like `setCursor()`.
+//!
+//! ### Code Injection and Manipulation
+//!
+//! \ref BaseBuilder emitter stores its nodes in a double-linked list, which makes it easy to manipulate that
+//! list during the code generation or afterwards. Each node is always emitted next to the current cursor and
+//! the cursor is advanced to that newly emitted node. The cursor can be retrieved and changed by \ref
+//! BaseBuilder::cursor() and \ref BaseBuilder::setCursor(), respectively.
+//!
+//! The example below demonstrates how to remember a node and inject something
+//! next to it.
+//!
+//! ```
+//! static void example(x86::Builder& builder) {
+//!   // Emit something, after it returns the cursor would point at the last
+//!   // emitted node.
+//!   builder.mov(x86::rax, x86::rdx); // [1]
+//!
+//!   // We can retrieve the node.
+//!   BaseNode* node = builder.cursor();
+//!
+//!   // Change the instruction we just emitted, just for fun...
+//!   if (node->isInst()) {
+//!     InstNode* inst = node->as<InstNode>();
+//!     // Changes the operands at index [1] to RCX.
+//!     inst->setOp(1, x86::rcx);
+//!   }
+//!
+//!   // ------------------------- Generate Some Code -------------------------
+//!   builder.add(x86::rax, x86::rdx); // [2]
+//!   builder.shr(x86::rax, 3);        // [3]
+//!   // ----------------------------------------------------------------------
+//!
+//!   // Now, we know where our node is, and we can simply change the cursor
+//!   // and start emitting something after it. The setCursor() function
+//!   // returns the previous cursor, and it's always a good practice to remember
+//!   // it, because you never know if you are not already injecting the code
+//!   // somewhere else...
+//!   BaseNode* oldCursor = builder.setCursor(node);
+//!
+//!   builder.mul(x86::rax, 8);        // [4]
+//!
+//!   // Restore the cursor
+//!   builder.setCursor(oldCursor);
+//! }
+//! ```
+//!
+//! The function above would actually emit the following:
+//!
+//! ```
+//! mov rax, rcx ; [1] Patched at the beginning.
+//! mul rax, 8   ; [4] Injected.
+//! add rax, rdx ; [2] Followed [1] initially.
+//! shr rax, 3   ; [3] Follows [2].
+//! ```
+class ASMJIT_VIRTAPI Builder
+  : public BaseBuilder,
+    public EmitterImplicitT<Builder> {
+public:
+  ASMJIT_NONCOPYABLE(Builder)
+  typedef BaseBuilder Base;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API explicit Builder(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Builder() noexcept;
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+
+  //! \name Finalize
+  //! \{
+
+  ASMJIT_API Error finalize() override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_BUILDER
+#endif // ASMJIT_X86_X86BUILDER_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86compiler.cpp b/lib/lepton/asmjit/x86/x86compiler.cpp
new file mode 100644
index 0000000000..04d0980707
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86compiler.cpp
@@ -0,0 +1,61 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86) && !defined(ASMJIT_NO_COMPILER)
+
+#include "../x86/x86assembler.h"
+#include "../x86/x86compiler.h"
+#include "../x86/x86instapi_p.h"
+#include "../x86/x86rapass_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::Compiler - Construction & Destruction
+// ==========================================
+
+Compiler::Compiler(CodeHolder* code) noexcept : BaseCompiler() {
+  _archMask = (uint64_t(1) << uint32_t(Arch::kX86)) |
+              (uint64_t(1) << uint32_t(Arch::kX64)) ;
+  assignEmitterFuncs(this);
+
+  if (code)
+    code->attach(this);
+}
+Compiler::~Compiler() noexcept {}
+
+// x86::Compiler - Events
+// ======================
+
+Error Compiler::onAttach(CodeHolder* code) noexcept {
+  ASMJIT_PROPAGATE(Base::onAttach(code));
+  Error err = addPassT<X86RAPass>();
+
+  if (ASMJIT_UNLIKELY(err)) {
+    onDetach(code);
+    return err;
+  }
+
+  return kErrorOk;
+}
+
+Error Compiler::onDetach(CodeHolder* code) noexcept {
+  return Base::onDetach(code);
+}
+
+// x86::Compiler - Finalize
+// ========================
+
+Error Compiler::finalize() {
+  ASMJIT_PROPAGATE(runPasses());
+  Assembler a(_code);
+  a.addEncodingOptions(encodingOptions());
+  a.addDiagnosticOptions(diagnosticOptions());
+  return serializeTo(&a);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86 && !ASMJIT_NO_COMPILER
diff --git a/lib/lepton/asmjit/x86/x86compiler.h b/lib/lepton/asmjit/x86/x86compiler.h
new file mode 100644
index 0000000000..d89aea0251
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86compiler.h
@@ -0,0 +1,721 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86COMPILER_H_INCLUDED
+#define ASMJIT_X86_X86COMPILER_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/compiler.h"
+#include "../core/type.h"
+#include "../x86/x86emitter.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86/X64 compiler implementation.
+//!
+//! ### Compiler Basics
+//!
+//! The first \ref x86::Compiler example shows how to generate a function that simply returns an integer value. It's
+//! an analogy to the first Assembler example:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef int (*Func)(void);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Runtime specialized for JIT code execution.
+//!   CodeHolder code;                  // Holds code and relocation information.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Compiler cc(&code);          // Create and attach x86::Compiler to code.
+//!
+//!   cc.addFunc(FuncSignatureT<int>());// Begin a function of `int fn(void)` signature.
+//!
+//!   x86::Gp vReg = cc.newGpd();       // Create a 32-bit general purpose register.
+//!   cc.mov(vReg, 1);                  // Move one to our virtual register `vReg`.
+//!   cc.ret(vReg);                     // Return `vReg` from the function.
+//!
+//!   cc.endFunc();                     // End of the function body.
+//!   cc.finalize();                    // Translate and assemble the whole 'cc' content.
+//!   // ----> x86::Compiler is no longer needed from here and can be destroyed <----
+//!
+//!   Func fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   int result = fn();                // Execute the generated code.
+//!   printf("%d\n", result);           // Print the resulting "1".
+//!
+//!   rt.release(fn);                   // Explicitly remove the function from the runtime.
+//!   return 0;
+//! }
+//! ```
+//!
+//! The \ref BaseCompiler::addFunc() and \ref BaseCompiler::endFunc() functions are used to define the function and
+//! its end. Both must be called per function, but the body doesn't have to be generated in sequence. An example of
+//! generating two functions will be shown later. The next example shows more complicated code that contain a loop
+//! and generates a simple memory copy function that uses `uint32_t` items:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef void (*MemCpy32)(uint32_t* dst, const uint32_t* src, size_t count);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Runtime specialized for JIT code execution.
+//!   CodeHolder code;                  // Holds code and relocation information.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Compiler cc(&code);          // Create and attach x86::Compiler to code.
+//!
+//!   FuncNode* funcNode = cc.addFunc(  // Begin the function of the following signature:
+//!     FuncSignatureT<void,            //   Return value - void      (no return value).
+//!       uint32_t*,                    //   1st argument - uint32_t* (machine reg-size).
+//!       const uint32_t*,              //   2nd argument - uint32_t* (machine reg-size).
+//!       size_t>());                   //   3rd argument - size_t    (machine reg-size).
+//!
+//!   Label L_Loop = cc.newLabel();     // Start of the loop.
+//!   Label L_Exit = cc.newLabel();     // Used to exit early.
+//!
+//!   x86::Gp dst = cc.newIntPtr("dst");// Create `dst` register (destination pointer).
+//!   x86::Gp src = cc.newIntPtr("src");// Create `src` register (source pointer).
+//!   x86::Gp i = cc.newUIntPtr("i");   // Create `i` register (loop counter).
+//!
+//!   funcNode->setArg(0, dst);         // Assign `dst` argument.
+//!   funcNode->setArg(1, src);         // Assign `src` argument.
+//!   funcNode->setArg(2, i);           // Assign `i` argument.
+//!
+//!   cc.test(i, i);                    // Early exit if length is zero.
+//!   cc.jz(L_Exit);
+//!
+//!   cc.bind(L_Loop);                  // Bind the beginning of the loop here.
+//!
+//!   x86::Gp tmp = cc.newInt32("tmp"); // Copy a single dword (4 bytes).
+//!   cc.mov(tmp, x86::dword_ptr(src)); // Load DWORD from [src] address.
+//!   cc.mov(x86::dword_ptr(dst), tmp); // Store DWORD to [dst] address.
+//!
+//!   cc.add(src, 4);                   // Increment `src`.
+//!   cc.add(dst, 4);                   // Increment `dst`.
+//!
+//!   cc.dec(i);                        // Loop until `i` is non-zero.
+//!   cc.jnz(L_Loop);
+//!
+//!   cc.bind(L_Exit);                  // Label used by early exit.
+//!   cc.endFunc();                     // End of the function body.
+//!
+//!   cc.finalize();                    // Translate and assemble the whole 'cc' content.
+//!   // ----> x86::Compiler is no longer needed from here and can be destroyed <----
+//!
+//!   // Add the generated code to the runtime.
+//!   MemCpy32 memcpy32;
+//!   Error err = rt.add(&memcpy32, &code);
+//!
+//!   // Handle a possible error returned by AsmJit.
+//!   if (err)
+//!     return 1;
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   // Test the generated code.
+//!   uint32_t input[6] = { 1, 2, 3, 5, 8, 13 };
+//!   uint32_t output[6];
+//!   memcpy32(output, input, 6);
+//!
+//!   for (uint32_t i = 0; i < 6; i++)
+//!     printf("%d\n", output[i]);
+//!
+//!   rt.release(memcpy32);
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### AVX and AVX-512
+//!
+//! AVX and AVX-512 code generation must be explicitly enabled via \ref FuncFrame to work properly. If it's not setup
+//! correctly then Prolog & Epilog would use SSE instead of AVX instructions to work with SIMD registers. In addition,
+//! Compiler requires explicitly enable AVX-512 via \ref FuncFrame in order to use all 32 SIMD registers.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef void (*Func)(void*);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Runtime specialized for JIT code execution.
+//!   CodeHolder code;                  // Holds code and relocation information.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Compiler cc(&code);          // Create and attach x86::Compiler to code.
+//!
+//!   FuncNode* funcNode = cc.addFunc(FuncSignatureT<void, void*>());
+//!
+//!   // Use the following to enable AVX and/or AVX-512.
+//!   funcNode->frame().setAvxEnabled();
+//!   funcNode->frame().setAvx512Enabled();
+//!
+//!   // Do something with the input pointer.
+//!   x86::Gp addr = cc.newIntPtr("addr");
+//!   x86::Zmm vreg = cc.newZmm("vreg");
+//!
+//!   funcNode->setArg(0, addr);
+//!
+//!   cc.vmovdqu32(vreg, x86::ptr(addr));
+//!   cc.vpaddq(vreg, vreg, vreg);
+//!   cc.vmovdqu32(x86::ptr(addr), vreg);
+//!
+//!   cc.endFunc();                     // End of the function body.
+//!   cc.finalize();                    // Translate and assemble the whole 'cc' content.
+//!   // ----> x86::Compiler is no longer needed from here and can be destroyed <----
+//!
+//!   Func fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   // Execute the generated code and print some output.
+//!   uint64_t data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+//!   fn(data);
+//!   printf("%llu\n", (unsigned long long)data[0]);
+//!
+//!   rt.release(fn);                   // Explicitly remove the function from the runtime.
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Recursive Functions
+//!
+//! It's possible to create more functions by using the same \ref x86::Compiler instance and make links between them.
+//! In such case it's important to keep the pointer to \ref FuncNode.
+//!
+//! The example below creates a simple Fibonacci function that calls itself recursively:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef uint32_t (*Fibonacci)(uint32_t x);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Runtime specialized for JIT code execution.
+//!   CodeHolder code;                  // Holds code and relocation information.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Compiler cc(&code);          // Create and attach x86::Compiler to code.
+//!
+//!   FuncNode* funcNode = cc.addFunc(  // Begin of the Fibonacci function, addFunc()
+//!     FuncSignatureT<int, int>());    // Returns a pointer to the FuncNode node.
+//!
+//!   Label L_Exit = cc.newLabel()      // Exit label.
+//!   x86::Gp x = cc.newUInt32();       // Function x argument.
+//!   x86::Gp y = cc.newUInt32();       // Temporary.
+//!
+//!   funcNode->setArg(0, x);
+//!
+//!   cc.cmp(x, 3);                     // Return x if less than 3.
+//!   cc.jb(L_Exit);
+//!
+//!   cc.mov(y, x);                     // Make copy of the original x.
+//!   cc.dec(x);                        // Decrease x.
+//!
+//!   InvokeNode* invokeNode;           // Function invocation:
+//!   cc.invoke(&invokeNode,            //   - InvokeNode (output).
+//!     funcNode->label(),              //   - Function address or Label.
+//!     FuncSignatureT<int, int>());    //   - Function signature.
+//!
+//!   invokeNode->setArg(0, x);         // Assign x as the first argument.
+//!   invokeNode->setRet(0, x);         // Assign x as a return value as well.
+//!
+//!   cc.add(x, y);                     // Combine the return value with y.
+//!
+//!   cc.bind(L_Exit);
+//!   cc.ret(x);                        // Return x.
+//!   cc.endFunc();                     // End of the function body.
+//!
+//!   cc.finalize();                    // Translate and assemble the whole 'cc' content.
+//!   // ----> x86::Compiler is no longer needed from here and can be destroyed <----
+//!
+//!   Fibonacci fib;
+//!   Error err = rt.add(&fib, &code);  // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   // Test the generated code.
+//!   printf("Fib(%u) -> %u\n", 8, fib(8));
+//!
+//!   rt.release(fib);
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Stack Management
+//!
+//! Function's stack-frame is managed automatically, which is used by the register allocator to spill virtual
+//! registers. It also provides an interface to allocate user-defined block of the stack, which can be used as
+//! a temporary storage by the generated function. In the following example a stack of 256 bytes size is allocated,
+//! filled by bytes starting from 0 to 255 and then iterated again to sum all the values.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef int (*Func)(void);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Runtime specialized for JIT code execution.
+//!   CodeHolder code;                  // Holds code and relocation information.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Compiler cc(&code);          // Create and attach x86::Compiler to code.
+//!
+//!   cc.addFunc(FuncSignatureT<int>());// Create a function that returns int.
+//!
+//!   x86::Gp p = cc.newIntPtr("p");
+//!   x86::Gp i = cc.newIntPtr("i");
+//!
+//!   // Allocate 256 bytes on the stack aligned to 4 bytes.
+//!   x86::Mem stack = cc.newStack(256, 4);
+//!
+//!   x86::Mem stackIdx(stack);         // Copy of stack with i added.
+//!   stackIdx.setIndex(i);             // stackIdx <- stack[i].
+//!   stackIdx.setSize(1);              // stackIdx <- byte ptr stack[i].
+//!
+//!   // Load a stack address to `p`. This step is purely optional and shows
+//!   // that `lea` is useful to load a memory operands address (even absolute)
+//!   // to a general purpose register.
+//!   cc.lea(p, stack);
+//!
+//!   // Clear i (xor is a C++ keyword, hence 'xor_' is used instead).
+//!   cc.xor_(i, i);
+//!
+//!   Label L1 = cc.newLabel();
+//!   Label L2 = cc.newLabel();
+//!
+//!   cc.bind(L1);                      // First loop, fill the stack.
+//!   cc.mov(stackIdx, i.r8());         // stack[i] = uint8_t(i).
+//!
+//!   cc.inc(i);                        // i++;
+//!   cc.cmp(i, 256);                   // if (i < 256)
+//!   cc.jb(L1);                        //   goto L1;
+//!
+//!   // Second loop, sum all bytes stored in `stack`.
+//!   x86::Gp sum = cc.newInt32("sum");
+//!   x86::Gp val = cc.newInt32("val");
+//!
+//!   cc.xor_(i, i);
+//!   cc.xor_(sum, sum);
+//!
+//!   cc.bind(L2);
+//!
+//!   cc.movzx(val, stackIdx);          // val = uint32_t(stack[i]);
+//!   cc.add(sum, val);                 // sum += val;
+//!
+//!   cc.inc(i);                        // i++;
+//!   cc.cmp(i, 256);                   // if (i < 256)
+//!   cc.jb(L2);                        //   goto L2;
+//!
+//!   cc.ret(sum);                      // Return the `sum` of all values.
+//!   cc.endFunc();                     // End of the function body.
+//!
+//!   cc.finalize();                    // Translate and assemble the whole 'cc' content.
+//!   // ----> x86::Compiler is no longer needed from here and can be destroyed <----
+//!
+//!   Func func;
+//!   Error err = rt.add(&func, &code); // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   printf("Func() -> %d\n", func()); // Test the generated code.
+//!
+//!   rt.release(func);
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Constant Pool
+//!
+//! Compiler provides two constant pools for a general purpose code generation:
+//!
+//!   - Local constant pool - Part of \ref FuncNode, can be only used by a single function and added after the
+//!     function epilog sequence (after `ret` instruction).
+//!
+//!   - Global constant pool - Part of \ref BaseCompiler, flushed at the end of the generated code by \ref
+//!     BaseEmitter::finalize().
+//!
+//! The example below illustrates how a built-in constant pool can be used:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! static void exampleUseOfConstPool(x86::Compiler& cc) {
+//!   cc.addFunc(FuncSignatureT<int>());
+//!
+//!   x86::Gp v0 = cc.newGpd("v0");
+//!   x86::Gp v1 = cc.newGpd("v1");
+//!
+//!   x86::Mem c0 = cc.newInt32Const(ConstPoolScope::kLocal, 200);
+//!   x86::Mem c1 = cc.newInt32Const(ConstPoolScope::kLocal, 33);
+//!
+//!   cc.mov(v0, c0);
+//!   cc.mov(v1, c1);
+//!   cc.add(v0, v1);
+//!
+//!   cc.ret(v0);
+//!   cc.endFunc();
+//! }
+//! ```
+//!
+//! ### Jump Tables
+//!
+//! x86::Compiler supports `jmp` instruction with reg/mem operand, which is a commonly used pattern to implement
+//! indirect jumps within a function, for example to implement `switch()` statement in a programming languages.
+//! By default AsmJit assumes that every basic block can be a possible jump target as it's unable to deduce targets
+//! from instruction's operands. This is a very pessimistic default that should be avoided if possible as it's costly
+//! and very unfriendly to liveness analysis and register allocation.
+//!
+//! Instead of relying on such pessimistic default behavior, let's use \ref JumpAnnotation to annotate a jump where
+//! all targets are known:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! static void exampleUseOfIndirectJump(x86::Compiler& cc) {
+//!   FuncNode* funcNode = cc.addFunc(FuncSignatureT<float, float, float, uint32_t>(CallConvId::kHost));
+//!
+//!   // Function arguments
+//!   x86::Xmm a = cc.newXmmSs("a");
+//!   x86::Xmm b = cc.newXmmSs("b");
+//!   x86::Gp op = cc.newUInt32("op");
+//!
+//!   x86::Gp target = cc.newIntPtr("target");
+//!   x86::Gp offset = cc.newIntPtr("offset");
+//!
+//!   Label L_Table = cc.newLabel();
+//!   Label L_Add = cc.newLabel();
+//!   Label L_Sub = cc.newLabel();
+//!   Label L_Mul = cc.newLabel();
+//!   Label L_Div = cc.newLabel();
+//!   Label L_End = cc.newLabel();
+//!
+//!   funcNode->setArg(0, a);
+//!   funcNode->setArg(1, b);
+//!   funcNode->setArg(2, op);
+//!
+//!   // Jump annotation is a building block that allows to annotate all possible targets where `jmp()` can
+//!   // jump. It then drives the CFG construction and liveness analysis, which impacts register allocation.
+//!   JumpAnnotation* annotation = cc.newJumpAnnotation();
+//!   annotation->addLabel(L_Add);
+//!   annotation->addLabel(L_Sub);
+//!   annotation->addLabel(L_Mul);
+//!   annotation->addLabel(L_Div);
+//!
+//!   // Most likely not the common indirect jump approach, but it
+//!   // doesn't really matter how final address is calculated. The
+//!   // most important path using JumpAnnotation with `jmp()`.
+//!   cc.lea(offset, x86::ptr(L_Table));
+//!   if (cc.is64Bit())
+//!     cc.movsxd(target, x86::dword_ptr(offset, op.cloneAs(offset), 2));
+//!   else
+//!     cc.mov(target, x86::dword_ptr(offset, op.cloneAs(offset), 2));
+//!   cc.add(target, offset);
+//!   cc.jmp(target, annotation);
+//!
+//!   // Acts like a switch() statement in C.
+//!   cc.bind(L_Add);
+//!   cc.addss(a, b);
+//!   cc.jmp(L_End);
+//!
+//!   cc.bind(L_Sub);
+//!   cc.subss(a, b);
+//!   cc.jmp(L_End);
+//!
+//!   cc.bind(L_Mul);
+//!   cc.mulss(a, b);
+//!   cc.jmp(L_End);
+//!
+//!   cc.bind(L_Div);
+//!   cc.divss(a, b);
+//!
+//!   cc.bind(L_End);
+//!   cc.ret(a);
+//!
+//!   cc.endFunc();
+//!
+//!   // Relative int32_t offsets of `L_XXX - L_Table`.
+//!   cc.bind(L_Table);
+//!   cc.embedLabelDelta(L_Add, L_Table, 4);
+//!   cc.embedLabelDelta(L_Sub, L_Table, 4);
+//!   cc.embedLabelDelta(L_Mul, L_Table, 4);
+//!   cc.embedLabelDelta(L_Div, L_Table, 4);
+//! }
+//! ```
+class ASMJIT_VIRTAPI Compiler
+  : public BaseCompiler,
+    public EmitterExplicitT<Compiler> {
+public:
+  ASMJIT_NONCOPYABLE(Compiler)
+  typedef BaseCompiler Base;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API explicit Compiler(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Compiler() noexcept;
+
+  //! \}
+
+  //! \name Virtual Registers
+  //! \{
+
+#ifndef ASMJIT_NO_LOGGING
+# define ASMJIT_NEW_REG_FMT(OUT, PARAM, FORMAT, ARGS)                         \
+    _newRegFmt(&OUT, PARAM, FORMAT, ARGS)
+#else
+# define ASMJIT_NEW_REG_FMT(OUT, PARAM, FORMAT, ARGS)                         \
+    DebugUtils::unused(FORMAT);                                               \
+    DebugUtils::unused(std::forward<Args>(args)...);                          \
+    _newReg(&OUT, PARAM)
+#endif
+
+#define ASMJIT_NEW_REG_CUSTOM(FUNC, REG)                                      \
+    inline REG FUNC(TypeId typeId) {                                          \
+      REG reg(Globals::NoInit);                                               \
+      _newReg(&reg, typeId);                                                  \
+      return reg;                                                             \
+    }                                                                         \
+                                                                              \
+    template<typename... Args>                                                \
+    inline REG FUNC(TypeId typeId, const char* fmt, Args&&... args) {         \
+      REG reg(Globals::NoInit);                                               \
+      ASMJIT_NEW_REG_FMT(reg, typeId, fmt, std::forward<Args>(args)...);      \
+      return reg;                                                             \
+    }
+
+#define ASMJIT_NEW_REG_TYPED(FUNC, REG, TYPE_ID)                              \
+    inline REG FUNC() {                                                       \
+      REG reg(Globals::NoInit);                                               \
+      _newReg(&reg, TYPE_ID);                                                 \
+      return reg;                                                             \
+    }                                                                         \
+                                                                              \
+    template<typename... Args>                                                \
+    inline REG FUNC(const char* fmt, Args&&... args) {                        \
+      REG reg(Globals::NoInit);                                               \
+      ASMJIT_NEW_REG_FMT(reg, TYPE_ID, fmt, std::forward<Args>(args)...);     \
+      return reg;                                                             \
+    }
+
+  template<typename RegT>
+  inline RegT newSimilarReg(const RegT& ref) {
+    RegT reg(Globals::NoInit);
+    _newReg(reg, ref);
+    return reg;
+  }
+
+  template<typename RegT, typename... Args>
+  inline RegT newSimilarReg(const RegT& ref, const char* fmt, Args&&... args) {
+    RegT reg(Globals::NoInit);
+    ASMJIT_NEW_REG_FMT(reg, ref, fmt, std::forward<Args>(args)...);
+    return reg;
+  }
+
+  ASMJIT_NEW_REG_CUSTOM(newReg    , Reg )
+  ASMJIT_NEW_REG_CUSTOM(newGp     , Gp  )
+  ASMJIT_NEW_REG_CUSTOM(newVec    , Vec )
+  ASMJIT_NEW_REG_CUSTOM(newK      , KReg)
+
+  ASMJIT_NEW_REG_TYPED(newInt8   , Gp  , TypeId::kInt8)
+  ASMJIT_NEW_REG_TYPED(newUInt8  , Gp  , TypeId::kUInt8)
+  ASMJIT_NEW_REG_TYPED(newInt16  , Gp  , TypeId::kInt16)
+  ASMJIT_NEW_REG_TYPED(newUInt16 , Gp  , TypeId::kUInt16)
+  ASMJIT_NEW_REG_TYPED(newInt32  , Gp  , TypeId::kInt32)
+  ASMJIT_NEW_REG_TYPED(newUInt32 , Gp  , TypeId::kUInt32)
+  ASMJIT_NEW_REG_TYPED(newInt64  , Gp  , TypeId::kInt64)
+  ASMJIT_NEW_REG_TYPED(newUInt64 , Gp  , TypeId::kUInt64)
+  ASMJIT_NEW_REG_TYPED(newIntPtr , Gp  , TypeId::kIntPtr)
+  ASMJIT_NEW_REG_TYPED(newUIntPtr, Gp  , TypeId::kUIntPtr)
+
+  ASMJIT_NEW_REG_TYPED(newGpb    , Gp  , TypeId::kUInt8)
+  ASMJIT_NEW_REG_TYPED(newGpw    , Gp  , TypeId::kUInt16)
+  ASMJIT_NEW_REG_TYPED(newGpd    , Gp  , TypeId::kUInt32)
+  ASMJIT_NEW_REG_TYPED(newGpq    , Gp  , TypeId::kUInt64)
+  ASMJIT_NEW_REG_TYPED(newGpz    , Gp  , TypeId::kUIntPtr)
+  ASMJIT_NEW_REG_TYPED(newXmm    , Xmm , TypeId::kInt32x4)
+  ASMJIT_NEW_REG_TYPED(newXmmSs  , Xmm , TypeId::kFloat32x1)
+  ASMJIT_NEW_REG_TYPED(newXmmSd  , Xmm , TypeId::kFloat64x1)
+  ASMJIT_NEW_REG_TYPED(newXmmPs  , Xmm , TypeId::kFloat32x4)
+  ASMJIT_NEW_REG_TYPED(newXmmPd  , Xmm , TypeId::kFloat64x2)
+  ASMJIT_NEW_REG_TYPED(newYmm    , Ymm , TypeId::kInt32x8)
+  ASMJIT_NEW_REG_TYPED(newYmmPs  , Ymm , TypeId::kFloat32x8)
+  ASMJIT_NEW_REG_TYPED(newYmmPd  , Ymm , TypeId::kFloat64x4)
+  ASMJIT_NEW_REG_TYPED(newZmm    , Zmm , TypeId::kInt32x16)
+  ASMJIT_NEW_REG_TYPED(newZmmPs  , Zmm , TypeId::kFloat32x16)
+  ASMJIT_NEW_REG_TYPED(newZmmPd  , Zmm , TypeId::kFloat64x8)
+  ASMJIT_NEW_REG_TYPED(newMm     , Mm  , TypeId::kMmx64)
+  ASMJIT_NEW_REG_TYPED(newKb     , KReg, TypeId::kMask8)
+  ASMJIT_NEW_REG_TYPED(newKw     , KReg, TypeId::kMask16)
+  ASMJIT_NEW_REG_TYPED(newKd     , KReg, TypeId::kMask32)
+  ASMJIT_NEW_REG_TYPED(newKq     , KReg, TypeId::kMask64)
+
+#undef ASMJIT_NEW_REG_TYPED
+#undef ASMJIT_NEW_REG_CUSTOM
+#undef ASMJIT_NEW_REG_FMT
+
+  //! \}
+
+  //! \name Stack
+  //! \{
+
+  //! Creates a new memory chunk allocated on the current function's stack.
+  inline Mem newStack(uint32_t size, uint32_t alignment, const char* name = nullptr) {
+    Mem m(Globals::NoInit);
+    _newStack(&m, size, alignment, name);
+    return m;
+  }
+
+  //! \}
+
+  //! \name Constants
+  //! \{
+
+  //! Put data to a constant-pool and get a memory reference to it.
+  inline Mem newConst(ConstPoolScope scope, const void* data, size_t size) {
+    Mem m(Globals::NoInit);
+    _newConst(&m, scope, data, size);
+    return m;
+  }
+
+  //! Put a BYTE `val` to a constant-pool.
+  inline Mem newByteConst(ConstPoolScope scope, uint8_t val) noexcept { return newConst(scope, &val, 1); }
+  //! Put a WORD `val` to a constant-pool.
+  inline Mem newWordConst(ConstPoolScope scope, uint16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a DWORD `val` to a constant-pool.
+  inline Mem newDWordConst(ConstPoolScope scope, uint32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a QWORD `val` to a constant-pool.
+  inline Mem newQWordConst(ConstPoolScope scope, uint64_t val) noexcept { return newConst(scope, &val, 8); }
+
+  //! Put a WORD `val` to a constant-pool.
+  inline Mem newInt16Const(ConstPoolScope scope, int16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a WORD `val` to a constant-pool.
+  inline Mem newUInt16Const(ConstPoolScope scope, uint16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a DWORD `val` to a constant-pool.
+  inline Mem newInt32Const(ConstPoolScope scope, int32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a DWORD `val` to a constant-pool.
+  inline Mem newUInt32Const(ConstPoolScope scope, uint32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a QWORD `val` to a constant-pool.
+  inline Mem newInt64Const(ConstPoolScope scope, int64_t val) noexcept { return newConst(scope, &val, 8); }
+  //! Put a QWORD `val` to a constant-pool.
+  inline Mem newUInt64Const(ConstPoolScope scope, uint64_t val) noexcept { return newConst(scope, &val, 8); }
+
+  //! Put a SP-FP `val` to a constant-pool.
+  inline Mem newFloatConst(ConstPoolScope scope, float val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a DP-FP `val` to a constant-pool.
+  inline Mem newDoubleConst(ConstPoolScope scope, double val) noexcept { return newConst(scope, &val, 8); }
+
+  //! \}
+
+  //! \name Instruction Options
+  //! \{
+
+  //! Force the compiler to not follow the conditional or unconditional jump.
+  inline Compiler& unfollow() noexcept { addInstOptions(InstOptions::kUnfollow); return *this; }
+  //! Tell the compiler that the destination variable will be overwritten.
+  inline Compiler& overwrite() noexcept { addInstOptions(InstOptions::kOverwrite); return *this; }
+
+  //! \}
+
+  //! \name Function Call & Ret Intrinsics
+  //! \{
+
+  //! Invoke a function call without `target` type enforcement.
+  inline Error invoke_(InvokeNode** out, const Operand_& target, const FuncSignature& signature) {
+    return addInvokeNode(out, Inst::kIdCall, target, signature);
+  }
+
+  //! Invoke a function call of the given `target` and `signature` and store the added node to `out`.
+  //!
+  //! Creates a new \ref InvokeNode, initializes all the necessary members to match the given function `signature`,
+  //! adds the node to the compiler, and stores its pointer to `out`. The operation is atomic, if anything fails
+  //! nullptr is stored in `out` and error code is returned.
+  inline Error invoke(InvokeNode** out, const Gp& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Mem& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Label& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Imm& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, uint64_t target, const FuncSignature& signature) { return invoke_(out, Imm(int64_t(target)), signature); }
+
+  //! Return from function.
+  inline Error ret() { return addRet(Operand(), Operand()); }
+  //! \overload
+  inline Error ret(const BaseReg& o0) { return addRet(o0, Operand()); }
+  //! \overload
+  inline Error ret(const BaseReg& o0, const BaseReg& o1) { return addRet(o0, o1); }
+
+  //! \}
+
+  //! \name Jump Tables Support
+  //! \{
+
+  using EmitterExplicitT<Compiler>::jmp;
+
+  //! Adds a jump to the given `target` with the provided jump `annotation`.
+  inline Error jmp(const BaseReg& target, JumpAnnotation* annotation) { return emitAnnotatedJump(Inst::kIdJmp, target, annotation); }
+  //! \overload
+  inline Error jmp(const BaseMem& target, JumpAnnotation* annotation) { return emitAnnotatedJump(Inst::kIdJmp, target, annotation); }
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+
+  //! \name Finalize
+  //! \{
+
+  ASMJIT_API Error finalize() override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_X86_X86COMPILER_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86emithelper.cpp b/lib/lepton/asmjit/x86/x86emithelper.cpp
new file mode 100644
index 0000000000..b541c048b0
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86emithelper.cpp
@@ -0,0 +1,619 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86)
+
+#include "../core/formatter.h"
+#include "../core/funcargscontext_p.h"
+#include "../core/string.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../core/radefs_p.h"
+#include "../x86/x86emithelper_p.h"
+#include "../x86/x86emitter.h"
+#include "../x86/x86formatter_p.h"
+#include "../x86/x86instapi_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::EmitHelper - Utilities
+// ===========================
+
+static inline uint32_t getXmmMovInst(const FuncFrame& frame) {
+  bool avx = frame.isAvxEnabled();
+  bool aligned = frame.hasAlignedVecSR();
+
+  return aligned ? (avx ? Inst::kIdVmovaps : Inst::kIdMovaps)
+                 : (avx ? Inst::kIdVmovups : Inst::kIdMovups);
+}
+
+//! Converts `size` to a 'kmov?' instruction.
+static inline uint32_t kmovInstFromSize(uint32_t size) noexcept {
+  switch (size) {
+    case  1: return Inst::kIdKmovb;
+    case  2: return Inst::kIdKmovw;
+    case  4: return Inst::kIdKmovd;
+    case  8: return Inst::kIdKmovq;
+    default: return Inst::kIdNone;
+  }
+}
+
+static inline uint32_t makeCastOp(TypeId dst, TypeId src) noexcept {
+  return (uint32_t(dst) << 8) | uint32_t(src);
+}
+
+// x86::EmitHelper - Emit Reg Move
+// ===============================
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitRegMove(
+  const Operand_& dst_,
+  const Operand_& src_, TypeId typeId, const char* comment) {
+
+  // Invalid or abstract TypeIds are not allowed.
+  ASMJIT_ASSERT(TypeUtils::isValid(typeId) && !TypeUtils::isAbstract(typeId));
+
+  Operand dst(dst_);
+  Operand src(src_);
+
+  InstId instId = Inst::kIdNone;
+  uint32_t memFlags = 0;
+  uint32_t overrideMemSize = 0;
+
+  enum MemFlags : uint32_t {
+    kDstMem = 0x1,
+    kSrcMem = 0x2
+  };
+
+  // Detect memory operands and patch them to have the same size as the register. BaseCompiler always sets memory size
+  // of allocs and spills, so it shouldn't be really necessary, however, after this function was separated from Compiler
+  // it's better to make sure that the size is always specified, as we can use 'movzx' and 'movsx' that rely on it.
+  if (dst.isMem()) { memFlags |= kDstMem; dst.as<Mem>().setSize(src.size()); }
+  if (src.isMem()) { memFlags |= kSrcMem; src.as<Mem>().setSize(dst.size()); }
+
+  switch (typeId) {
+    case TypeId::kInt8:
+    case TypeId::kUInt8:
+    case TypeId::kInt16:
+    case TypeId::kUInt16:
+      // Special case - 'movzx' load.
+      if (memFlags & kSrcMem) {
+        instId = Inst::kIdMovzx;
+        dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        break;
+      }
+
+      if (!memFlags) {
+        // Change both destination and source registers to GPD (safer, no dependencies).
+        dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        src.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+      }
+      ASMJIT_FALLTHROUGH;
+
+    case TypeId::kInt32:
+    case TypeId::kUInt32:
+    case TypeId::kInt64:
+    case TypeId::kUInt64:
+      instId = Inst::kIdMov;
+      break;
+
+    case TypeId::kMmx32:
+      instId = Inst::kIdMovd;
+      if (memFlags) break;
+      ASMJIT_FALLTHROUGH;
+
+    case TypeId::kMmx64 : instId = Inst::kIdMovq ; break;
+    case TypeId::kMask8 : instId = Inst::kIdKmovb; break;
+    case TypeId::kMask16: instId = Inst::kIdKmovw; break;
+    case TypeId::kMask32: instId = Inst::kIdKmovd; break;
+    case TypeId::kMask64: instId = Inst::kIdKmovq; break;
+
+    default: {
+      TypeId scalarTypeId = TypeUtils::scalarOf(typeId);
+      if (TypeUtils::isVec32(typeId) && memFlags) {
+        overrideMemSize = 4;
+        if (scalarTypeId == TypeId::kFloat32)
+          instId = _avxEnabled ? Inst::kIdVmovss : Inst::kIdMovss;
+        else
+          instId = _avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
+        break;
+      }
+
+      if (TypeUtils::isVec64(typeId) && memFlags) {
+        overrideMemSize = 8;
+        if (scalarTypeId == TypeId::kFloat64)
+          instId = _avxEnabled ? Inst::kIdVmovsd : Inst::kIdMovsd;
+        else
+          instId = _avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
+        break;
+      }
+
+      if (scalarTypeId == TypeId::kFloat32)
+        instId = _avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps;
+      else if (scalarTypeId == TypeId::kFloat64)
+        instId = _avxEnabled ? Inst::kIdVmovapd : Inst::kIdMovapd;
+      else if (!_avx512Enabled)
+        instId = _avxEnabled ? Inst::kIdVmovdqa : Inst::kIdMovdqa;
+      else
+        instId = Inst::kIdVmovdqa32;
+      break;
+    }
+  }
+
+  if (!instId)
+    return DebugUtils::errored(kErrorInvalidState);
+
+  if (overrideMemSize) {
+    if (dst.isMem()) dst.as<Mem>().setSize(overrideMemSize);
+    if (src.isMem()) src.as<Mem>().setSize(overrideMemSize);
+  }
+
+  _emitter->setInlineComment(comment);
+  return _emitter->emit(instId, dst, src);
+}
+
+// x86::EmitHelper - Emit Arg Move
+// ===============================
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitArgMove(
+  const BaseReg& dst_, TypeId dstTypeId,
+  const Operand_& src_, TypeId srcTypeId, const char* comment) {
+
+  // Deduce optional `dstTypeId`, which may be `TypeId::kVoid` in some cases.
+  if (dstTypeId == TypeId::kVoid) {
+    const ArchTraits& archTraits = ArchTraits::byArch(_emitter->arch());
+    dstTypeId = archTraits.regTypeToTypeId(dst_.type());
+  }
+
+  // Invalid or abstract TypeIds are not allowed.
+  ASMJIT_ASSERT(TypeUtils::isValid(dstTypeId) && !TypeUtils::isAbstract(dstTypeId));
+  ASMJIT_ASSERT(TypeUtils::isValid(srcTypeId) && !TypeUtils::isAbstract(srcTypeId));
+
+  Reg dst(dst_.as<Reg>());
+  Operand src(src_);
+
+  uint32_t dstSize = TypeUtils::sizeOf(dstTypeId);
+  uint32_t srcSize = TypeUtils::sizeOf(srcTypeId);
+
+  InstId instId = Inst::kIdNone;
+
+  // Not a real loop, just 'break' is nicer than 'goto'.
+  for (;;) {
+    if (TypeUtils::isInt(dstTypeId)) {
+      if (TypeUtils::isInt(srcTypeId)) {
+        instId = Inst::kIdMovsx;
+        uint32_t castOp = makeCastOp(dstTypeId, srcTypeId);
+
+        // Sign extend by using 'movsx'.
+        if (castOp == makeCastOp(TypeId::kInt16, TypeId::kInt8 ) ||
+            castOp == makeCastOp(TypeId::kInt32, TypeId::kInt8 ) ||
+            castOp == makeCastOp(TypeId::kInt32, TypeId::kInt16) ||
+            castOp == makeCastOp(TypeId::kInt64, TypeId::kInt8 ) ||
+            castOp == makeCastOp(TypeId::kInt64, TypeId::kInt16))
+          break;
+
+        // Sign extend by using 'movsxd'.
+        instId = Inst::kIdMovsxd;
+        if (castOp == makeCastOp(TypeId::kInt64, TypeId::kInt32))
+          break;
+      }
+
+      if (TypeUtils::isInt(srcTypeId) || src_.isMem()) {
+        // Zero extend by using 'movzx' or 'mov'.
+        if (dstSize <= 4 && srcSize < 4) {
+          instId = Inst::kIdMovzx;
+          dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        }
+        else {
+          // We should have caught all possibilities where `srcSize` is less than 4, so we don't have to worry
+          // about 'movzx' anymore. Minimum size is enough to determine if we want 32-bit or 64-bit move.
+          instId = Inst::kIdMov;
+          srcSize = Support::min(srcSize, dstSize);
+
+          dst.setSignature(srcSize == 4 ? Reg::signatureOfT<RegType::kX86_Gpd>()
+                                        : Reg::signatureOfT<RegType::kX86_Gpq>());
+          if (src.isReg())
+            src.setSignature(dst.signature());
+        }
+        break;
+      }
+
+      // NOTE: The previous branch caught all memory sources, from here it's always register to register conversion,
+      // so catch the remaining cases.
+      srcSize = Support::min(srcSize, dstSize);
+
+      if (TypeUtils::isMmx(srcTypeId)) {
+        // 64-bit move.
+        instId = Inst::kIdMovq;
+        if (srcSize == 8)
+          break;
+
+        // 32-bit move.
+        instId = Inst::kIdMovd;
+        dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        break;
+      }
+
+      if (TypeUtils::isMask(srcTypeId)) {
+        instId = kmovInstFromSize(srcSize);
+        dst.setSignature(srcSize <= 4 ? Reg::signatureOfT<RegType::kX86_Gpd>()
+                                      : Reg::signatureOfT<RegType::kX86_Gpq>());
+        break;
+      }
+
+      if (TypeUtils::isVec(srcTypeId)) {
+        // 64-bit move.
+        instId = _avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
+        if (srcSize == 8)
+          break;
+
+        // 32-bit move.
+        instId = _avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
+        dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        break;
+      }
+    }
+
+    if (TypeUtils::isMmx(dstTypeId)) {
+      instId = Inst::kIdMovq;
+      srcSize = Support::min(srcSize, dstSize);
+
+      if (TypeUtils::isInt(srcTypeId) || src.isMem()) {
+        // 64-bit move.
+        if (srcSize == 8)
+          break;
+
+        // 32-bit move.
+        instId = Inst::kIdMovd;
+        if (src.isReg())
+          src.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        break;
+      }
+
+      if (TypeUtils::isMmx(srcTypeId))
+        break;
+
+      // This will hurt if AVX is enabled.
+      instId = Inst::kIdMovdq2q;
+      if (TypeUtils::isVec(srcTypeId))
+        break;
+    }
+
+    if (TypeUtils::isMask(dstTypeId)) {
+      srcSize = Support::min(srcSize, dstSize);
+
+      if (TypeUtils::isInt(srcTypeId) || TypeUtils::isMask(srcTypeId) || src.isMem()) {
+        instId = kmovInstFromSize(srcSize);
+        if (Reg::isGp(src) && srcSize <= 4)
+          src.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        break;
+      }
+    }
+
+    if (TypeUtils::isVec(dstTypeId)) {
+      // By default set destination to XMM, will be set to YMM|ZMM if needed.
+      dst.setSignature(Reg::signatureOfT<RegType::kX86_Xmm>());
+
+      // This will hurt if AVX is enabled.
+      if (Reg::isMm(src)) {
+        // 64-bit move.
+        instId = Inst::kIdMovq2dq;
+        break;
+      }
+
+      // Argument conversion.
+      TypeId dstScalarId = TypeUtils::scalarOf(dstTypeId);
+      TypeId srcScalarId = TypeUtils::scalarOf(srcTypeId);
+
+      if (dstScalarId == TypeId::kFloat32 && srcScalarId == TypeId::kFloat64) {
+        srcSize = Support::min(dstSize * 2, srcSize);
+        dstSize = srcSize / 2;
+
+        if (srcSize <= 8)
+          instId = _avxEnabled ? Inst::kIdVcvtss2sd : Inst::kIdCvtss2sd;
+        else
+          instId = _avxEnabled ? Inst::kIdVcvtps2pd : Inst::kIdCvtps2pd;
+
+        if (dstSize == 32)
+          dst.setSignature(Reg::signatureOfT<RegType::kX86_Ymm>());
+        if (src.isReg())
+          src.setSignature(Reg::signatureOfVecBySize(srcSize));
+        break;
+      }
+
+      if (dstScalarId == TypeId::kFloat64 && srcScalarId == TypeId::kFloat32) {
+        srcSize = Support::min(dstSize, srcSize * 2) / 2;
+        dstSize = srcSize * 2;
+
+        if (srcSize <= 4)
+          instId = _avxEnabled ? Inst::kIdVcvtsd2ss : Inst::kIdCvtsd2ss;
+        else
+          instId = _avxEnabled ? Inst::kIdVcvtpd2ps : Inst::kIdCvtpd2ps;
+
+        dst.setSignature(Reg::signatureOfVecBySize(dstSize));
+        if (src.isReg() && srcSize >= 32)
+          src.setSignature(Reg::signatureOfT<RegType::kX86_Ymm>());
+        break;
+      }
+
+      srcSize = Support::min(srcSize, dstSize);
+      if (Reg::isGp(src) || src.isMem()) {
+        // 32-bit move.
+        if (srcSize <= 4) {
+          instId = _avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
+          if (src.isReg())
+            src.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+          break;
+        }
+
+        // 64-bit move.
+        if (srcSize == 8) {
+          instId = _avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
+          break;
+        }
+      }
+
+      if (Reg::isVec(src) || src.isMem()) {
+        instId = _avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps;
+
+        if (src.isMem() && srcSize < _emitter->environment().stackAlignment())
+          instId = _avxEnabled ? Inst::kIdVmovups : Inst::kIdMovups;
+
+        OperandSignature signature = Reg::signatureOfVecBySize(srcSize);
+        dst.setSignature(signature);
+        if (src.isReg())
+          src.setSignature(signature);
+        break;
+      }
+    }
+
+    return DebugUtils::errored(kErrorInvalidState);
+  }
+
+  if (src.isMem())
+    src.as<Mem>().setSize(srcSize);
+
+  _emitter->setInlineComment(comment);
+  return _emitter->emit(instId, dst, src);
+}
+
+Error EmitHelper::emitRegSwap(
+  const BaseReg& a,
+  const BaseReg& b, const char* comment) {
+
+  if (a.isGp() && b.isGp()) {
+    _emitter->setInlineComment(comment);
+    return _emitter->emit(Inst::kIdXchg, a, b);
+  }
+  else
+    return DebugUtils::errored(kErrorInvalidState);
+}
+
+// x86::EmitHelper - Emit Prolog & Epilog
+// ======================================
+
+static inline void X86Internal_setupSaveRestoreInfo(RegGroup group, const FuncFrame& frame, Reg& xReg, uint32_t& xInst, uint32_t& xSize) noexcept {
+  switch (group) {
+    case RegGroup::kVec:
+      xReg = xmm(0);
+      xInst = getXmmMovInst(frame);
+      xSize = xReg.size();
+      break;
+    case RegGroup::kX86_K:
+      xReg = k(0);
+      xInst = Inst::kIdKmovq;
+      xSize = xReg.size();
+      break;
+    case RegGroup::kX86_MM:
+      xReg = mm(0);
+      xInst = Inst::kIdMovq;
+      xSize = xReg.size();
+      break;
+    default:
+      break;
+  }
+}
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitProlog(const FuncFrame& frame) {
+  Emitter* emitter = _emitter->as<Emitter>();
+  uint32_t gpSaved = frame.savedRegs(RegGroup::kGp);
+
+  Gp zsp = emitter->zsp();   // ESP|RSP register.
+  Gp zbp = emitter->zbp();   // EBP|RBP register.
+  Gp gpReg = zsp;            // General purpose register (temporary).
+  Gp saReg = zsp;            // Stack-arguments base pointer.
+
+  // Emit: 'push zbp'
+  //       'mov  zbp, zsp'.
+  if (frame.hasPreservedFP()) {
+    gpSaved &= ~Support::bitMask(Gp::kIdBp);
+    ASMJIT_PROPAGATE(emitter->push(zbp));
+    ASMJIT_PROPAGATE(emitter->mov(zbp, zsp));
+  }
+
+  // Emit: 'push gp' sequence.
+  {
+    Support::BitWordIterator<RegMask> it(gpSaved);
+    while (it.hasNext()) {
+      gpReg.setId(it.next());
+      ASMJIT_PROPAGATE(emitter->push(gpReg));
+    }
+  }
+
+  // Emit: 'mov saReg, zsp'.
+  uint32_t saRegId = frame.saRegId();
+  if (saRegId != BaseReg::kIdBad && saRegId != Gp::kIdSp) {
+    saReg.setId(saRegId);
+    if (frame.hasPreservedFP()) {
+      if (saRegId != Gp::kIdBp)
+        ASMJIT_PROPAGATE(emitter->mov(saReg, zbp));
+    }
+    else {
+      ASMJIT_PROPAGATE(emitter->mov(saReg, zsp));
+    }
+  }
+
+  // Emit: 'and zsp, StackAlignment'.
+  if (frame.hasDynamicAlignment()) {
+    ASMJIT_PROPAGATE(emitter->and_(zsp, -int32_t(frame.finalStackAlignment())));
+  }
+
+  // Emit: 'sub zsp, StackAdjustment'.
+  if (frame.hasStackAdjustment()) {
+    ASMJIT_PROPAGATE(emitter->sub(zsp, frame.stackAdjustment()));
+  }
+
+  // Emit: 'mov [zsp + DAOffset], saReg'.
+  if (frame.hasDynamicAlignment() && frame.hasDAOffset()) {
+    Mem saMem = ptr(zsp, int32_t(frame.daOffset()));
+    ASMJIT_PROPAGATE(emitter->mov(saMem, saReg));
+  }
+
+  // Emit 'movxxx [zsp + X], {[x|y|z]mm, k}'.
+  {
+    Reg xReg;
+    Mem xBase = ptr(zsp, int32_t(frame.extraRegSaveOffset()));
+
+    uint32_t xInst;
+    uint32_t xSize;
+
+    for (RegGroup group : Support::EnumValues<RegGroup, RegGroup(1), RegGroup::kMaxVirt>{}) {
+      Support::BitWordIterator<RegMask> it(frame.savedRegs(group));
+      if (it.hasNext()) {
+        X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize);
+        do {
+          xReg.setId(it.next());
+          ASMJIT_PROPAGATE(emitter->emit(xInst, xBase, xReg));
+          xBase.addOffsetLo32(int32_t(xSize));
+        } while (it.hasNext());
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitEpilog(const FuncFrame& frame) {
+  Emitter* emitter = _emitter->as<Emitter>();
+
+  uint32_t i;
+  uint32_t regId;
+
+  uint32_t registerSize = emitter->registerSize();
+  uint32_t gpSaved = frame.savedRegs(RegGroup::kGp);
+
+  Gp zsp = emitter->zsp();   // ESP|RSP register.
+  Gp zbp = emitter->zbp();   // EBP|RBP register.
+  Gp gpReg = emitter->zsp(); // General purpose register (temporary).
+
+  // Don't emit 'pop zbp' in the pop sequence, this case is handled separately.
+  if (frame.hasPreservedFP())
+    gpSaved &= ~Support::bitMask(Gp::kIdBp);
+
+  // Emit 'movxxx {[x|y|z]mm, k}, [zsp + X]'.
+  {
+    Reg xReg;
+    Mem xBase = ptr(zsp, int32_t(frame.extraRegSaveOffset()));
+
+    uint32_t xInst;
+    uint32_t xSize;
+
+    for (RegGroup group : Support::EnumValues<RegGroup, RegGroup(1), RegGroup::kMaxVirt>{}) {
+      Support::BitWordIterator<RegMask> it(frame.savedRegs(group));
+      if (it.hasNext()) {
+        X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize);
+        do {
+          xReg.setId(it.next());
+          ASMJIT_PROPAGATE(emitter->emit(xInst, xReg, xBase));
+          xBase.addOffsetLo32(int32_t(xSize));
+        } while (it.hasNext());
+      }
+    }
+  }
+
+  // Emit 'emms' and/or 'vzeroupper'.
+  if (frame.hasMmxCleanup()) ASMJIT_PROPAGATE(emitter->emms());
+  if (frame.hasAvxCleanup()) ASMJIT_PROPAGATE(emitter->vzeroupper());
+
+  if (frame.hasPreservedFP()) {
+    // Emit 'mov zsp, zbp' or 'lea zsp, [zbp - x]'
+    int32_t count = int32_t(frame.pushPopSaveSize() - registerSize);
+    if (!count)
+      ASMJIT_PROPAGATE(emitter->mov(zsp, zbp));
+    else
+      ASMJIT_PROPAGATE(emitter->lea(zsp, ptr(zbp, -count)));
+  }
+  else {
+    if (frame.hasDynamicAlignment() && frame.hasDAOffset()) {
+      // Emit 'mov zsp, [zsp + DsaSlot]'.
+      Mem saMem = ptr(zsp, int32_t(frame.daOffset()));
+      ASMJIT_PROPAGATE(emitter->mov(zsp, saMem));
+    }
+    else if (frame.hasStackAdjustment()) {
+      // Emit 'add zsp, StackAdjustment'.
+      ASMJIT_PROPAGATE(emitter->add(zsp, int32_t(frame.stackAdjustment())));
+    }
+  }
+
+  // Emit 'pop gp' sequence.
+  if (gpSaved) {
+    i = gpSaved;
+    regId = 16;
+
+    do {
+      regId--;
+      if (i & 0x8000) {
+        gpReg.setId(regId);
+        ASMJIT_PROPAGATE(emitter->pop(gpReg));
+      }
+      i <<= 1;
+    } while (regId != 0);
+  }
+
+  // Emit 'pop zbp'.
+  if (frame.hasPreservedFP())
+    ASMJIT_PROPAGATE(emitter->pop(zbp));
+
+  // Emit 'ret' or 'ret x'.
+  if (frame.hasCalleeStackCleanup())
+    ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet, int(frame.calleeStackCleanup())));
+  else
+    ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet));
+
+  return kErrorOk;
+}
+
+static Error ASMJIT_CDECL Emitter_emitProlog(BaseEmitter* emitter, const FuncFrame& frame) {
+  EmitHelper emitHelper(emitter, frame.isAvxEnabled(), frame.isAvx512Enabled());
+  return emitHelper.emitProlog(frame);
+}
+
+static Error ASMJIT_CDECL Emitter_emitEpilog(BaseEmitter* emitter, const FuncFrame& frame) {
+  EmitHelper emitHelper(emitter, frame.isAvxEnabled(), frame.isAvx512Enabled());
+  return emitHelper.emitEpilog(frame);
+}
+
+static Error ASMJIT_CDECL Emitter_emitArgsAssignment(BaseEmitter* emitter, const FuncFrame& frame, const FuncArgsAssignment& args) {
+  EmitHelper emitHelper(emitter, frame.isAvxEnabled(), frame.isAvx512Enabled());
+  return emitHelper.emitArgsAssignment(frame, args);
+}
+
+void assignEmitterFuncs(BaseEmitter* emitter) {
+  emitter->_funcs.emitProlog = Emitter_emitProlog;
+  emitter->_funcs.emitEpilog = Emitter_emitEpilog;
+  emitter->_funcs.emitArgsAssignment = Emitter_emitArgsAssignment;
+
+#ifndef ASMJIT_NO_LOGGING
+  emitter->_funcs.formatInstruction = FormatterInternal::formatInstruction;
+#endif
+
+#ifndef ASMJIT_NO_VALIDATION
+  emitter->_funcs.validate = InstInternal::validate;
+#endif
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86
diff --git a/lib/lepton/asmjit/x86/x86emithelper_p.h b/lib/lepton/asmjit/x86/x86emithelper_p.h
new file mode 100644
index 0000000000..e71d9afe7d
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86emithelper_p.h
@@ -0,0 +1,60 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86EMITHELPER_P_H_INCLUDED
+#define ASMJIT_X86_X86EMITHELPER_P_H_INCLUDED
+
+#include "../core/api-config.h"
+
+#include "../core/emithelper_p.h"
+#include "../core/func.h"
+#include "../x86/x86emitter.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+static inline RegType vecTypeIdToRegType(TypeId typeId) noexcept {
+  return uint32_t(typeId) <= uint32_t(TypeId::_kVec128End) ? RegType::kX86_Xmm :
+         uint32_t(typeId) <= uint32_t(TypeId::_kVec256End) ? RegType::kX86_Ymm : RegType::kX86_Zmm;
+}
+
+class EmitHelper : public BaseEmitHelper {
+public:
+  bool _avxEnabled;
+  bool _avx512Enabled;
+
+  inline explicit EmitHelper(BaseEmitter* emitter = nullptr, bool avxEnabled = false, bool avx512Enabled = false) noexcept
+    : BaseEmitHelper(emitter),
+      _avxEnabled(avxEnabled || avx512Enabled),
+      _avx512Enabled(avx512Enabled) {}
+
+  Error emitRegMove(
+    const Operand_& dst_,
+    const Operand_& src_, TypeId typeId, const char* comment = nullptr) override;
+
+  Error emitArgMove(
+    const BaseReg& dst_, TypeId dstTypeId,
+    const Operand_& src_, TypeId srcTypeId, const char* comment = nullptr) override;
+
+  Error emitRegSwap(
+    const BaseReg& a,
+    const BaseReg& b, const char* comment = nullptr) override;
+
+  Error emitProlog(const FuncFrame& frame);
+  Error emitEpilog(const FuncFrame& frame);
+};
+
+void assignEmitterFuncs(BaseEmitter* emitter);
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86EMITHELPER_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86emitter.h b/lib/lepton/asmjit/x86/x86emitter.h
new file mode 100644
index 0000000000..1f85dec4fd
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86emitter.h
@@ -0,0 +1,4315 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86EMITTER_H_INCLUDED
+#define ASMJIT_X86_X86EMITTER_H_INCLUDED
+
+#include "../core/emitter.h"
+#include "../core/support.h"
+#include "../x86/x86globals.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+#define ASMJIT_INST_0x(NAME, ID) \
+  inline Error NAME() { return _emitter()->_emitI(Inst::kId##ID); }
+
+#define ASMJIT_INST_1x(NAME, ID, T0) \
+  inline Error NAME(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID, o0); }
+
+#define ASMJIT_INST_1c(NAME, ID, CONV, T0) \
+  inline Error NAME(CondCode cc, const T0& o0) { return _emitter()->_emitI(CONV(cc), o0); } \
+  inline Error NAME##a(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##a, o0); } \
+  inline Error NAME##ae(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##ae, o0); } \
+  inline Error NAME##b(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##b, o0); } \
+  inline Error NAME##be(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##be, o0); } \
+  inline Error NAME##c(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##c, o0); } \
+  inline Error NAME##e(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##e, o0); } \
+  inline Error NAME##g(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##g, o0); } \
+  inline Error NAME##ge(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##ge, o0); } \
+  inline Error NAME##l(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##l, o0); } \
+  inline Error NAME##le(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##le, o0); } \
+  inline Error NAME##na(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##na, o0); } \
+  inline Error NAME##nae(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nae, o0); } \
+  inline Error NAME##nb(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nb, o0); } \
+  inline Error NAME##nbe(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nbe, o0); } \
+  inline Error NAME##nc(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nc, o0); } \
+  inline Error NAME##ne(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##ne, o0); } \
+  inline Error NAME##ng(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##ng, o0); } \
+  inline Error NAME##nge(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nge, o0); } \
+  inline Error NAME##nl(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nl, o0); } \
+  inline Error NAME##nle(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nle, o0); } \
+  inline Error NAME##no(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##no, o0); } \
+  inline Error NAME##np(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##np, o0); } \
+  inline Error NAME##ns(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##ns, o0); } \
+  inline Error NAME##nz(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nz, o0); } \
+  inline Error NAME##o(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##o, o0); } \
+  inline Error NAME##p(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##p, o0); } \
+  inline Error NAME##pe(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##pe, o0); } \
+  inline Error NAME##po(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##po, o0); } \
+  inline Error NAME##s(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##s, o0); } \
+  inline Error NAME##z(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##z, o0); }
+
+#define ASMJIT_INST_2x(NAME, ID, T0, T1) \
+  inline Error NAME(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID, o0, o1); }
+
+#define ASMJIT_INST_2c(NAME, ID, CONV, T0, T1) \
+  inline Error NAME(CondCode cc, const T0& o0, const T1& o1) { return _emitter()->_emitI(CONV(cc), o0, o1); } \
+  inline Error NAME##a(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##a, o0, o1); } \
+  inline Error NAME##ae(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##ae, o0, o1); } \
+  inline Error NAME##b(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##b, o0, o1); } \
+  inline Error NAME##be(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##be, o0, o1); } \
+  inline Error NAME##c(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##c, o0, o1); } \
+  inline Error NAME##e(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##e, o0, o1); } \
+  inline Error NAME##g(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##g, o0, o1); } \
+  inline Error NAME##ge(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##ge, o0, o1); } \
+  inline Error NAME##l(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##l, o0, o1); } \
+  inline Error NAME##le(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##le, o0, o1); } \
+  inline Error NAME##na(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##na, o0, o1); } \
+  inline Error NAME##nae(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nae, o0, o1); } \
+  inline Error NAME##nb(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nb, o0, o1); } \
+  inline Error NAME##nbe(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nbe, o0, o1); } \
+  inline Error NAME##nc(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nc, o0, o1); } \
+  inline Error NAME##ne(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##ne, o0, o1); } \
+  inline Error NAME##ng(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##ng, o0, o1); } \
+  inline Error NAME##nge(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nge, o0, o1); } \
+  inline Error NAME##nl(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nl, o0, o1); } \
+  inline Error NAME##nle(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nle, o0, o1); } \
+  inline Error NAME##no(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##no, o0, o1); } \
+  inline Error NAME##np(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##np, o0, o1); } \
+  inline Error NAME##ns(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##ns, o0, o1); } \
+  inline Error NAME##nz(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nz, o0, o1); } \
+  inline Error NAME##o(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##o, o0, o1); } \
+  inline Error NAME##p(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##p, o0, o1); } \
+  inline Error NAME##pe(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##pe, o0, o1); } \
+  inline Error NAME##po(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##po, o0, o1); } \
+  inline Error NAME##s(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##s, o0, o1); } \
+  inline Error NAME##z(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##z, o0, o1); }
+
+#define ASMJIT_INST_3x(NAME, ID, T0, T1, T2) \
+  inline Error NAME(const T0& o0, const T1& o1, const T2& o2) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2); }
+
+#define ASMJIT_INST_4x(NAME, ID, T0, T1, T2, T3) \
+  inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3); }
+
+#define ASMJIT_INST_5x(NAME, ID, T0, T1, T2, T3, T4) \
+  inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, const T4& o4) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3, o4); }
+
+#define ASMJIT_INST_6x(NAME, ID, T0, T1, T2, T3, T4, T5) \
+  inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, const T4& o4, const T5& o5) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3, o4, o5); }
+
+//! \addtogroup asmjit_x86
+//! \{
+
+//! Emitter (X86 - explicit).
+template<typename This>
+struct EmitterExplicitT {
+  //! \cond
+
+  // These typedefs are used to describe implicit operands passed explicitly.
+  typedef Gp Gp_AL;
+  typedef Gp Gp_AH;
+  typedef Gp Gp_CL;
+  typedef Gp Gp_AX;
+  typedef Gp Gp_DX;
+
+  typedef Gp Gp_EAX;
+  typedef Gp Gp_EBX;
+  typedef Gp Gp_ECX;
+  typedef Gp Gp_EDX;
+
+  typedef Gp Gp_RAX;
+  typedef Gp Gp_RBX;
+  typedef Gp Gp_RCX;
+  typedef Gp Gp_RDX;
+
+  typedef Gp Gp_ZAX;
+  typedef Gp Gp_ZBX;
+  typedef Gp Gp_ZCX;
+  typedef Gp Gp_ZDX;
+
+  typedef Mem DS_ZAX; // ds:[zax]
+  typedef Mem DS_ZDI; // ds:[zdi]
+  typedef Mem ES_ZDI; // es:[zdi]
+  typedef Mem DS_ZSI; // ds:[zsi]
+
+  typedef Xmm XMM0;
+
+  // These two are unfortunately reported by the sanitizer. We know what we do, however, the sanitizer doesn't.
+  // I have tried to use reinterpret_cast instead, but that would generate bad code when compiled by MSC.
+  ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF inline This* _emitter() noexcept { return static_cast<This*>(this); }
+  ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF inline const This* _emitter() const noexcept { return static_cast<const This*>(this); }
+
+  //! \endcond
+
+  //! \name Native Registers
+  //! \{
+
+  //! Returns either GPD or GPQ register of the given `id` depending on the emitter's architecture.
+  inline Gp gpz(uint32_t id) const noexcept { return Gp(_emitter()->_gpSignature, id); }
+
+  inline Gp zax() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdAx); }
+  inline Gp zcx() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdCx); }
+  inline Gp zdx() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdDx); }
+  inline Gp zbx() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdBx); }
+  inline Gp zsp() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdSp); }
+  inline Gp zbp() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdBp); }
+  inline Gp zsi() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdSi); }
+  inline Gp zdi() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdDi); }
+
+  //! \}
+
+  //! \name Native Pointers
+  //! \{
+
+  //! Creates a target dependent pointer of which base register's id is `baseId`.
+  inline Mem ptr_base(uint32_t baseId, int32_t off = 0, uint32_t size = 0) const noexcept {
+    return Mem(OperandSignature::fromOpType(OperandType::kMem) |
+               OperandSignature::fromMemBaseType(_emitter()->_gpSignature.regType()) |
+               OperandSignature::fromSize(size),
+               baseId, 0, off);
+  }
+
+  inline Mem ptr_zax(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdAx, off, size); }
+  inline Mem ptr_zcx(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdCx, off, size); }
+  inline Mem ptr_zdx(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdDx, off, size); }
+  inline Mem ptr_zbx(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdBx, off, size); }
+  inline Mem ptr_zsp(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdSp, off, size); }
+  inline Mem ptr_zbp(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdBp, off, size); }
+  inline Mem ptr_zsi(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdSi, off, size); }
+  inline Mem ptr_zdi(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdDi, off, size); }
+
+  //! Creates an `intptr_t` memory operand depending on the current architecture.
+  inline Mem intptr_ptr(const Gp& base, int32_t offset = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, offset, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr(const Gp& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, index, shift, offset, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr(const Gp& base, const Vec& index, uint32_t shift = 0, int32_t offset = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, index, shift, offset, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr(const Label& base, int32_t offset = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, offset, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr(const Label& base, const Gp& index, uint32_t shift, int32_t offset = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, index, shift, offset, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr(const Label& base, const Vec& index, uint32_t shift, int32_t offset = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, index, shift, offset, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr(const Rip& rip, int32_t offset = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(rip, offset, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr(uint64_t base) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr(uint64_t base, const Gp& index, uint32_t shift = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, index, shift, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr_abs(uint64_t base) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, nativeGpSize, OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kAbs));
+  }
+  //! \overload
+  inline Mem intptr_ptr_abs(uint64_t base, const Gp& index, uint32_t shift = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, index, shift, nativeGpSize, OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kRel));
+  }
+
+  //! \}
+
+  //! \name Embed
+  //! \{
+
+  //! Embeds 8-bit integer data.
+  inline Error db(uint8_t x, size_t repeatCount = 1) { return _emitter()->embedUInt8(x, repeatCount); }
+  //! Embeds 16-bit integer data.
+  inline Error dw(uint16_t x, size_t repeatCount = 1) { return _emitter()->embedUInt16(x, repeatCount); }
+  //! Embeds 32-bit integer data.
+  inline Error dd(uint32_t x, size_t repeatCount = 1) { return _emitter()->embedUInt32(x, repeatCount); }
+  //! Embeds 64-bit integer data.
+  inline Error dq(uint64_t x, size_t repeatCount = 1) { return _emitter()->embedUInt64(x, repeatCount); }
+
+  //! Adds data in a given structure instance to the CodeBuffer.
+  template<typename T>
+  inline Error dstruct(const T& x) { return _emitter()->embed(&x, uint32_t(sizeof(T))); }
+
+  //! \}
+
+protected:
+  //! \cond
+  inline This& _addInstOptions(InstOptions options) noexcept {
+    _emitter()->addInstOptions(options);
+    return *_emitter();
+  }
+  //! \endcond
+
+public:
+  //! \name Short/Long Form Options
+  //! \{
+
+  //! Force short form of jmp/jcc instruction.
+  inline This& short_() noexcept { return _addInstOptions(InstOptions::kShortForm); }
+  //! Force long form of jmp/jcc instruction.
+  inline This& long_() noexcept { return _addInstOptions(InstOptions::kLongForm); }
+
+  //! \}
+
+  //! \name Encoding Options
+  //! \{
+
+  //! Prefer MOD/RM encoding when both MOD/RM and MOD/MR forms are applicable.
+  inline This& mod_rm() noexcept { return _addInstOptions(InstOptions::kX86_ModRM); }
+
+  //! Prefer MOD/MR encoding when both MOD/RM and MOD/MR forms are applicable.
+  inline This& mod_mr() noexcept { return _addInstOptions(InstOptions::kX86_ModMR); }
+
+  //! \}
+
+  //! \name Prefix Options
+  //! \{
+
+  //! Condition is likely to be taken (has only benefit on P4).
+  inline This& taken() noexcept { return _addInstOptions(InstOptions::kTaken); }
+  //! Condition is unlikely to be taken (has only benefit on P4).
+  inline This& notTaken() noexcept { return _addInstOptions(InstOptions::kNotTaken); }
+
+  //! Use LOCK prefix.
+  inline This& lock() noexcept { return _addInstOptions(InstOptions::kX86_Lock); }
+  //! Use XACQUIRE prefix.
+  inline This& xacquire() noexcept { return _addInstOptions(InstOptions::kX86_XAcquire); }
+  //! Use XRELEASE prefix.
+  inline This& xrelease() noexcept { return _addInstOptions(InstOptions::kX86_XRelease); }
+
+  //! Use BND/REPNE prefix.
+  //!
+  //! \note This is the same as using `repne()` or `repnz()` prefix.
+  inline This& bnd() noexcept { return _addInstOptions(InstOptions::kX86_Repne); }
+
+  //! Use REP/REPZ prefix.
+  //!
+  //! \note This is the same as using `repe()` or `repz()` prefix.
+  inline This& rep(const Gp& zcx) noexcept {
+    _emitter()->_extraReg.init(zcx);
+    return _addInstOptions(InstOptions::kX86_Rep);
+  }
+
+  //! Use REP/REPE prefix.
+  //!
+  //! \note This is the same as using `rep()` or `repz()` prefix.
+  inline This& repe(const Gp& zcx) noexcept { return rep(zcx); }
+
+  //! Use REP/REPE prefix.
+  //!
+  //! \note This is the same as using `rep()` or `repe()` prefix.
+  inline This& repz(const Gp& zcx) noexcept { return rep(zcx); }
+
+  //! Use REPNE prefix.
+  //!
+  //! \note This is the same as using `bnd()` or `repnz()` prefix.
+  inline This& repne(const Gp& zcx) noexcept {
+    _emitter()->_extraReg.init(zcx);
+    return _addInstOptions(InstOptions::kX86_Repne);
+  }
+
+  //! Use REPNE prefix.
+  //!
+  //! \note This is the same as using `bnd()` or `repne()` prefix.
+  inline This& repnz(const Gp& zcx) noexcept { return repne(zcx); }
+
+  //! \}
+
+  //! \name REX Options
+  //! \{
+
+  //! Force REX prefix to be emitted even when it's not needed (X86_64).
+  //!
+  //! \note Don't use when using high 8-bit registers as REX prefix makes them inaccessible and `x86::Assembler`
+  //! would fail to encode such instruction.
+  inline This& rex() noexcept { return _addInstOptions(InstOptions::kX86_Rex); }
+
+  //! Force REX.B prefix (X64) [It exists for special purposes only].
+  inline This& rex_b() noexcept { return _addInstOptions(InstOptions::kX86_OpCodeB); }
+  //! Force REX.X prefix (X64) [It exists for special purposes only].
+  inline This& rex_x() noexcept { return _addInstOptions(InstOptions::kX86_OpCodeX); }
+  //! Force REX.R prefix (X64) [It exists for special purposes only].
+  inline This& rex_r() noexcept { return _addInstOptions(InstOptions::kX86_OpCodeR); }
+  //! Force REX.W prefix (X64) [It exists for special purposes only].
+  inline This& rex_w() noexcept { return _addInstOptions(InstOptions::kX86_OpCodeW); }
+
+  //! \}
+
+  //! \name VEX and EVEX Options
+  //! \{
+
+  //! Use VEX prefix instead of EVEX prefix (useful to select AVX_VNNI instruction instead of AVX512_VNNI).
+  inline This& vex() noexcept { return _addInstOptions(InstOptions::kX86_Vex); }
+  //! Force 3-byte VEX prefix (AVX+).
+  inline This& vex3() noexcept { return _addInstOptions(InstOptions::kX86_Vex3); }
+  //! Force 4-byte EVEX prefix (AVX512+).
+  inline This& evex() noexcept { return _addInstOptions(InstOptions::kX86_Evex); }
+
+  //! \}
+
+  //! \name AVX-512 Options & Masking
+  //! \{
+
+  //! Use masking {k} (AVX512+).
+  inline This& k(const KReg& kreg) noexcept {
+    _emitter()->_extraReg.init(kreg);
+    return *_emitter();
+  }
+
+  //! Use zeroing instead of merging (AVX512+).
+  inline This& z() noexcept { return _addInstOptions(InstOptions::kX86_ZMask); }
+
+  //! Suppress all exceptions (AVX512+).
+  inline This& sae() noexcept { return _addInstOptions(InstOptions::kX86_SAE); }
+  //! Static rounding mode {rn} (round-to-nearest even) and {sae} (AVX512+).
+  inline This& rn_sae() noexcept { return _addInstOptions(InstOptions::kX86_ER | InstOptions::kX86_RN_SAE); }
+  //! Static rounding mode {rd} (round-down, toward -inf) and {sae} (AVX512+).
+  inline This& rd_sae() noexcept { return _addInstOptions(InstOptions::kX86_ER | InstOptions::kX86_RD_SAE); }
+  //! Static rounding mode {ru} (round-up, toward +inf) and {sae} (AVX512+).
+  inline This& ru_sae() noexcept { return _addInstOptions(InstOptions::kX86_ER | InstOptions::kX86_RU_SAE); }
+  //! Static rounding mode {rz} (round-toward-zero, truncate) and {sae} (AVX512+).
+  inline This& rz_sae() noexcept { return _addInstOptions(InstOptions::kX86_ER | InstOptions::kX86_RZ_SAE); }
+
+  //! \}
+
+  //! \name Core Instructions
+  //! \{
+
+  ASMJIT_INST_2x(adc, Adc, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(adc, Adc, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(adc, Adc, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(adc, Adc, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(adc, Adc, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(add, Add, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(add, Add, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(add, Add, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(add, Add, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(add, Add, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(and_, And, Gp, Gp)                                    // ANY
+  ASMJIT_INST_2x(and_, And, Gp, Mem)                                   // ANY
+  ASMJIT_INST_2x(and_, And, Gp, Imm)                                   // ANY
+  ASMJIT_INST_2x(and_, And, Mem, Gp)                                   // ANY
+  ASMJIT_INST_2x(and_, And, Mem, Imm)                                  // ANY
+  ASMJIT_INST_2x(bound, Bound, Gp, Mem)                                // X86
+  ASMJIT_INST_2x(bsf, Bsf, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(bsf, Bsf, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(bsr, Bsr, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(bsr, Bsr, Gp, Mem)                                    // ANY
+  ASMJIT_INST_1x(bswap, Bswap, Gp)                                     // ANY
+  ASMJIT_INST_2x(bt, Bt, Gp, Gp)                                       // ANY
+  ASMJIT_INST_2x(bt, Bt, Gp, Imm)                                      // ANY
+  ASMJIT_INST_2x(bt, Bt, Mem, Gp)                                      // ANY
+  ASMJIT_INST_2x(bt, Bt, Mem, Imm)                                     // ANY
+  ASMJIT_INST_2x(btc, Btc, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(btc, Btc, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(btc, Btc, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(btc, Btc, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(btr, Btr, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(btr, Btr, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(btr, Btr, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(btr, Btr, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(bts, Bts, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(bts, Bts, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(bts, Bts, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(bts, Bts, Mem, Imm)                                   // ANY
+  ASMJIT_INST_1x(cbw, Cbw, Gp_AX)                                      // ANY [EXPLICIT] AX      <- Sign Extend AL
+  ASMJIT_INST_2x(cdq, Cdq, Gp_EDX, Gp_EAX)                             // ANY [EXPLICIT] EDX:EAX <- Sign Extend EAX
+  ASMJIT_INST_1x(cdqe, Cdqe, Gp_EAX)                                   // X64 [EXPLICIT] RAX     <- Sign Extend EAX
+  ASMJIT_INST_2x(cqo, Cqo, Gp_RDX, Gp_RAX)                             // X64 [EXPLICIT] RDX:RAX <- Sign Extend RAX
+  ASMJIT_INST_2x(cwd, Cwd, Gp_DX, Gp_AX)                               // ANY [EXPLICIT] DX:AX   <- Sign Extend AX
+  ASMJIT_INST_1x(cwde, Cwde, Gp_EAX)                                   // ANY [EXPLICIT] EAX     <- Sign Extend AX
+  ASMJIT_INST_1x(call, Call, Gp)                                       // ANY
+  ASMJIT_INST_1x(call, Call, Mem)                                      // ANY
+  ASMJIT_INST_1x(call, Call, Label)                                    // ANY
+  ASMJIT_INST_1x(call, Call, Imm)                                      // ANY
+  ASMJIT_INST_2c(cmov, Cmov, Inst::cmovccFromCond, Gp, Gp)             // CMOV
+  ASMJIT_INST_2c(cmov, Cmov, Inst::cmovccFromCond, Gp, Mem)            // CMOV
+  ASMJIT_INST_2x(cmp, Cmp, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(cmp, Cmp, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(cmp, Cmp, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(cmp, Cmp, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(cmp, Cmp, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(cmps, Cmps, DS_ZSI, ES_ZDI)                           // ANY [EXPLICIT]
+  ASMJIT_INST_3x(cmpxchg, Cmpxchg, Gp, Gp, Gp_ZAX)                     // I486 [EXPLICIT]
+  ASMJIT_INST_3x(cmpxchg, Cmpxchg, Mem, Gp, Gp_ZAX)                    // I486 [EXPLICIT]
+  ASMJIT_INST_5x(cmpxchg16b, Cmpxchg16b, Mem, Gp_RDX, Gp_RAX, Gp_RCX, Gp_RBX); // CMPXCHG16B [EXPLICIT] m == EDX:EAX ? m <- ECX:EBX
+  ASMJIT_INST_5x(cmpxchg8b, Cmpxchg8b, Mem, Gp_EDX, Gp_EAX, Gp_ECX, Gp_EBX);   // CMPXCHG8B  [EXPLICIT] m == RDX:RAX ? m <- RCX:RBX
+  ASMJIT_INST_1x(dec, Dec, Gp)                                         // ANY
+  ASMJIT_INST_1x(dec, Dec, Mem)                                        // ANY
+  ASMJIT_INST_2x(div, Div, Gp, Gp)                                     // ANY [EXPLICIT]  AH[Rem]: AL[Quot] <- AX / r8
+  ASMJIT_INST_2x(div, Div, Gp, Mem)                                    // ANY [EXPLICIT]  AH[Rem]: AL[Quot] <- AX / m8
+  ASMJIT_INST_3x(div, Div, Gp, Gp, Gp)                                 // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / r16|r32|r64
+  ASMJIT_INST_3x(div, Div, Gp, Gp, Mem)                                // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / m16|m32|m64
+  ASMJIT_INST_2x(idiv, Idiv, Gp, Gp)                                   // ANY [EXPLICIT]  AH[Rem]: AL[Quot] <- AX / r8
+  ASMJIT_INST_2x(idiv, Idiv, Gp, Mem)                                  // ANY [EXPLICIT]  AH[Rem]: AL[Quot] <- AX / m8
+  ASMJIT_INST_3x(idiv, Idiv, Gp, Gp, Gp)                               // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / r16|r32|r64
+  ASMJIT_INST_3x(idiv, Idiv, Gp, Gp, Mem)                              // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / m16|m32|m64
+  ASMJIT_INST_2x(imul, Imul, Gp, Gp)                                   // ANY [EXPLICIT] AX <- AL * r8 | ra <- ra * rb
+  ASMJIT_INST_2x(imul, Imul, Gp, Mem)                                  // ANY [EXPLICIT] AX <- AL * m8 | ra <- ra * m16|m32|m64
+  ASMJIT_INST_2x(imul, Imul, Gp, Imm)                                  // ANY
+  ASMJIT_INST_3x(imul, Imul, Gp, Gp, Imm)                              // ANY
+  ASMJIT_INST_3x(imul, Imul, Gp, Mem, Imm)                             // ANY
+  ASMJIT_INST_3x(imul, Imul, Gp, Gp, Gp)                               // ANY [EXPLICIT] xDX:xAX <- xAX * r16|r32|r64
+  ASMJIT_INST_3x(imul, Imul, Gp, Gp, Mem)                              // ANY [EXPLICIT] xDX:xAX <- xAX * m16|m32|m64
+  ASMJIT_INST_1x(inc, Inc, Gp)                                         // ANY
+  ASMJIT_INST_1x(inc, Inc, Mem)                                        // ANY
+  ASMJIT_INST_1c(j, J, Inst::jccFromCond, Label)                       // ANY
+  ASMJIT_INST_1c(j, J, Inst::jccFromCond, Imm)                         // ANY
+  ASMJIT_INST_2x(jecxz, Jecxz, Gp, Label)                              // ANY [EXPLICIT] Short jump if CX/ECX/RCX is zero.
+  ASMJIT_INST_2x(jecxz, Jecxz, Gp, Imm)                                // ANY [EXPLICIT] Short jump if CX/ECX/RCX is zero.
+  ASMJIT_INST_1x(jmp, Jmp, Gp)                                         // ANY
+  ASMJIT_INST_1x(jmp, Jmp, Mem)                                        // ANY
+  ASMJIT_INST_1x(jmp, Jmp, Label)                                      // ANY
+  ASMJIT_INST_1x(jmp, Jmp, Imm)                                        // ANY
+  ASMJIT_INST_2x(lcall, Lcall, Imm, Imm)                               // ANY
+  ASMJIT_INST_1x(lcall, Lcall, Mem)                                    // ANY
+  ASMJIT_INST_2x(lea, Lea, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(ljmp, Ljmp, Imm, Imm)                                 // ANY
+  ASMJIT_INST_1x(ljmp, Ljmp, Mem)                                      // ANY
+  ASMJIT_INST_2x(lods, Lods, Gp_ZAX, DS_ZSI)                           // ANY [EXPLICIT]
+  ASMJIT_INST_2x(loop, Loop, Gp_ZCX, Label)                            // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0.
+  ASMJIT_INST_2x(loop, Loop, Gp_ZCX, Imm)                              // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0.
+  ASMJIT_INST_2x(loope, Loope, Gp_ZCX, Label)                          // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1.
+  ASMJIT_INST_2x(loope, Loope, Gp_ZCX, Imm)                            // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1.
+  ASMJIT_INST_2x(loopne, Loopne, Gp_ZCX, Label)                        // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0.
+  ASMJIT_INST_2x(loopne, Loopne, Gp_ZCX, Imm)                          // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0.
+  ASMJIT_INST_2x(mov, Mov, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(mov, Mov, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(mov, Mov, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(mov, Mov, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(mov, Mov, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(mov, Mov, Gp, CReg)                                   // ANY
+  ASMJIT_INST_2x(mov, Mov, CReg, Gp)                                   // ANY
+  ASMJIT_INST_2x(mov, Mov, Gp, DReg)                                   // ANY
+  ASMJIT_INST_2x(mov, Mov, DReg, Gp)                                   // ANY
+  ASMJIT_INST_2x(mov, Mov, Gp, SReg)                                   // ANY
+  ASMJIT_INST_2x(mov, Mov, Mem, SReg)                                  // ANY
+  ASMJIT_INST_2x(mov, Mov, SReg, Gp)                                   // ANY
+  ASMJIT_INST_2x(mov, Mov, SReg, Mem)                                  // ANY
+  ASMJIT_INST_2x(movabs, Movabs, Gp, Mem)                              // X64
+  ASMJIT_INST_2x(movabs, Movabs, Gp, Imm)                              // X64
+  ASMJIT_INST_2x(movabs, Movabs, Mem, Gp)                              // X64
+  ASMJIT_INST_2x(movnti, Movnti, Mem, Gp)                              // SSE2
+  ASMJIT_INST_2x(movs, Movs, ES_ZDI, DS_ZSI)                           // ANY [EXPLICIT]
+  ASMJIT_INST_2x(movsx, Movsx, Gp, Gp)                                 // ANY
+  ASMJIT_INST_2x(movsx, Movsx, Gp, Mem)                                // ANY
+  ASMJIT_INST_2x(movsxd, Movsxd, Gp, Gp)                               // X64
+  ASMJIT_INST_2x(movsxd, Movsxd, Gp, Mem)                              // X64
+  ASMJIT_INST_2x(movzx, Movzx, Gp, Gp)                                 // ANY
+  ASMJIT_INST_2x(movzx, Movzx, Gp, Mem)                                // ANY
+  ASMJIT_INST_2x(mul, Mul, Gp_AX, Gp)                                  // ANY [EXPLICIT] AX      <-  AL * r8
+  ASMJIT_INST_2x(mul, Mul, Gp_AX, Mem)                                 // ANY [EXPLICIT] AX      <-  AL * m8
+  ASMJIT_INST_3x(mul, Mul, Gp_ZDX, Gp_ZAX, Gp)                         // ANY [EXPLICIT] xDX:xAX <- xAX * r16|r32|r64
+  ASMJIT_INST_3x(mul, Mul, Gp_ZDX, Gp_ZAX, Mem)                        // ANY [EXPLICIT] xDX:xAX <- xAX * m16|m32|m64
+  ASMJIT_INST_1x(neg, Neg, Gp)                                         // ANY
+  ASMJIT_INST_1x(neg, Neg, Mem)                                        // ANY
+  ASMJIT_INST_0x(nop, Nop)                                             // ANY
+  ASMJIT_INST_1x(nop, Nop, Gp)                                         // ANY
+  ASMJIT_INST_1x(nop, Nop, Mem)                                        // ANY
+  ASMJIT_INST_2x(nop, Nop, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(nop, Nop, Mem, Gp)                                    // ANY
+  ASMJIT_INST_1x(not_, Not, Gp)                                        // ANY
+  ASMJIT_INST_1x(not_, Not, Mem)                                       // ANY
+  ASMJIT_INST_2x(or_, Or, Gp, Gp)                                      // ANY
+  ASMJIT_INST_2x(or_, Or, Gp, Mem)                                     // ANY
+  ASMJIT_INST_2x(or_, Or, Gp, Imm)                                     // ANY
+  ASMJIT_INST_2x(or_, Or, Mem, Gp)                                     // ANY
+  ASMJIT_INST_2x(or_, Or, Mem, Imm)                                    // ANY
+  ASMJIT_INST_1x(pop, Pop, Gp)                                         // ANY
+  ASMJIT_INST_1x(pop, Pop, Mem)                                        // ANY
+  ASMJIT_INST_1x(pop, Pop, SReg);                                      // ANY
+  ASMJIT_INST_0x(popa, Popa)                                           // X86
+  ASMJIT_INST_0x(popad, Popad)                                         // X86
+  ASMJIT_INST_0x(popf, Popf)                                           // ANY
+  ASMJIT_INST_0x(popfd, Popfd)                                         // X86
+  ASMJIT_INST_0x(popfq, Popfq)                                         // X64
+  ASMJIT_INST_1x(push, Push, Gp)                                       // ANY
+  ASMJIT_INST_1x(push, Push, Mem)                                      // ANY
+  ASMJIT_INST_1x(push, Push, SReg)                                     // ANY
+  ASMJIT_INST_1x(push, Push, Imm)                                      // ANY
+  ASMJIT_INST_0x(pusha, Pusha)                                         // X86
+  ASMJIT_INST_0x(pushad, Pushad)                                       // X86
+  ASMJIT_INST_0x(pushf, Pushf)                                         // ANY
+  ASMJIT_INST_0x(pushfd, Pushfd)                                       // X86
+  ASMJIT_INST_0x(pushfq, Pushfq)                                       // X64
+  ASMJIT_INST_2x(rcl, Rcl, Gp, Gp_CL)                                  // ANY
+  ASMJIT_INST_2x(rcl, Rcl, Mem, Gp_CL)                                 // ANY
+  ASMJIT_INST_2x(rcl, Rcl, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(rcl, Rcl, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(rcr, Rcr, Gp, Gp_CL)                                  // ANY
+  ASMJIT_INST_2x(rcr, Rcr, Mem, Gp_CL)                                 // ANY
+  ASMJIT_INST_2x(rcr, Rcr, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(rcr, Rcr, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(rol, Rol, Gp, Gp_CL)                                  // ANY
+  ASMJIT_INST_2x(rol, Rol, Mem, Gp_CL)                                 // ANY
+  ASMJIT_INST_2x(rol, Rol, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(rol, Rol, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(ror, Ror, Gp, Gp_CL)                                  // ANY
+  ASMJIT_INST_2x(ror, Ror, Mem, Gp_CL)                                 // ANY
+  ASMJIT_INST_2x(ror, Ror, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(ror, Ror, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(sbb, Sbb, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(sbb, Sbb, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(sbb, Sbb, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(sbb, Sbb, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(sbb, Sbb, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(sal, Sal, Gp, Gp_CL)                                  // ANY
+  ASMJIT_INST_2x(sal, Sal, Mem, Gp_CL)                                 // ANY
+  ASMJIT_INST_2x(sal, Sal, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(sal, Sal, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(sar, Sar, Gp, Gp_CL)                                  // ANY
+  ASMJIT_INST_2x(sar, Sar, Mem, Gp_CL)                                 // ANY
+  ASMJIT_INST_2x(sar, Sar, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(sar, Sar, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(scas, Scas, Gp_ZAX, ES_ZDI)                           // ANY [EXPLICIT]
+  ASMJIT_INST_1c(set, Set, Inst::setccFromCond, Gp)                    // ANY
+  ASMJIT_INST_1c(set, Set, Inst::setccFromCond, Mem)                   // ANY
+  ASMJIT_INST_2x(shl, Shl, Gp, Gp_CL)                                  // ANY
+  ASMJIT_INST_2x(shl, Shl, Mem, Gp_CL)                                 // ANY
+  ASMJIT_INST_2x(shl, Shl, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(shl, Shl, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(shr, Shr, Gp, Gp_CL)                                  // ANY
+  ASMJIT_INST_2x(shr, Shr, Mem, Gp_CL)                                 // ANY
+  ASMJIT_INST_2x(shr, Shr, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(shr, Shr, Mem, Imm)                                   // ANY
+  ASMJIT_INST_3x(shld, Shld, Gp, Gp, Gp_CL)                            // ANY
+  ASMJIT_INST_3x(shld, Shld, Mem, Gp, Gp_CL)                           // ANY
+  ASMJIT_INST_3x(shld, Shld, Gp, Gp, Imm)                              // ANY
+  ASMJIT_INST_3x(shld, Shld, Mem, Gp, Imm)                             // ANY
+  ASMJIT_INST_3x(shrd, Shrd, Gp, Gp, Gp_CL)                            // ANY
+  ASMJIT_INST_3x(shrd, Shrd, Mem, Gp, Gp_CL)                           // ANY
+  ASMJIT_INST_3x(shrd, Shrd, Gp, Gp, Imm)                              // ANY
+  ASMJIT_INST_3x(shrd, Shrd, Mem, Gp, Imm)                             // ANY
+  ASMJIT_INST_2x(stos, Stos, ES_ZDI, Gp_ZAX)                           // ANY [EXPLICIT]
+  ASMJIT_INST_2x(sub, Sub, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(sub, Sub, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(sub, Sub, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(sub, Sub, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(sub, Sub, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(test, Test, Gp, Gp)                                   // ANY
+  ASMJIT_INST_2x(test, Test, Gp, Imm)                                  // ANY
+  ASMJIT_INST_2x(test, Test, Mem, Gp)                                  // ANY
+  ASMJIT_INST_2x(test, Test, Mem, Imm)                                 // ANY
+  ASMJIT_INST_2x(ud0, Ud0, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(ud0, Ud0, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(ud1, Ud1, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(ud1, Ud1, Gp, Mem)                                    // ANY
+  ASMJIT_INST_0x(ud2, Ud2)                                             // ANY
+  ASMJIT_INST_2x(xadd, Xadd, Gp, Gp)                                   // ANY
+  ASMJIT_INST_2x(xadd, Xadd, Mem, Gp)                                  // ANY
+  ASMJIT_INST_2x(xchg, Xchg, Gp, Gp)                                   // ANY
+  ASMJIT_INST_2x(xchg, Xchg, Mem, Gp)                                  // ANY
+  ASMJIT_INST_2x(xchg, Xchg, Gp, Mem)                                  // ANY
+  ASMJIT_INST_2x(xor_, Xor, Gp, Gp)                                    // ANY
+  ASMJIT_INST_2x(xor_, Xor, Gp, Mem)                                   // ANY
+  ASMJIT_INST_2x(xor_, Xor, Gp, Imm)                                   // ANY
+  ASMJIT_INST_2x(xor_, Xor, Mem, Gp)                                   // ANY
+  ASMJIT_INST_2x(xor_, Xor, Mem, Imm)                                  // ANY
+
+  //! \}
+
+  //! \name Deprecated 32-bit Instructions
+  //! \{
+
+  ASMJIT_INST_1x(aaa, Aaa, Gp)                                         // X86 [EXPLICIT]
+  ASMJIT_INST_2x(aad, Aad, Gp, Imm)                                    // X86 [EXPLICIT]
+  ASMJIT_INST_2x(aam, Aam, Gp, Imm)                                    // X86 [EXPLICIT]
+  ASMJIT_INST_1x(aas, Aas, Gp)                                         // X86 [EXPLICIT]
+  ASMJIT_INST_1x(daa, Daa, Gp)                                         // X86 [EXPLICIT]
+  ASMJIT_INST_1x(das, Das, Gp)                                         // X86 [EXPLICIT]
+
+  //! \}
+
+  //! \name ENTER/LEAVE Instructions
+  //! \{
+
+  ASMJIT_INST_2x(enter, Enter, Imm, Imm)                               // ANY
+  ASMJIT_INST_0x(leave, Leave)                                         // ANY
+
+  //! \}
+
+  //! \name IN/OUT Instructions
+  //! \{
+
+  // NOTE: For some reason Doxygen is messed up here and thinks we are in cond.
+  //! \endcond
+
+  ASMJIT_INST_2x(in, In, Gp_ZAX, Imm)                                  // ANY
+  ASMJIT_INST_2x(in, In, Gp_ZAX, Gp_DX)                                // ANY
+  ASMJIT_INST_2x(ins, Ins, ES_ZDI, Gp_DX)                              // ANY
+  ASMJIT_INST_2x(out, Out, Imm, Gp_ZAX)                                // ANY
+  ASMJIT_INST_2x(out, Out, Gp_DX, Gp_ZAX)                              // ANY
+  ASMJIT_INST_2x(outs, Outs, Gp_DX, DS_ZSI)                            // ANY
+
+  //! \}
+
+  //! \name Clear/Set CF/DF Instructions
+  //! \{
+
+  ASMJIT_INST_0x(clc, Clc)                                             // ANY
+  ASMJIT_INST_0x(cld, Cld)                                             // ANY
+  ASMJIT_INST_0x(cmc, Cmc)                                             // ANY
+  ASMJIT_INST_0x(stc, Stc)                                             // ANY
+  ASMJIT_INST_0x(std, Std)                                             // ANY
+
+  //! \}
+
+  //! \name LAHF/SAHF Instructions
+  //! \{
+
+  ASMJIT_INST_1x(lahf, Lahf, Gp_AH)                                    // LAHFSAHF [EXPLICIT] AH <- EFL
+  ASMJIT_INST_1x(sahf, Sahf, Gp_AH)                                    // LAHFSAHF [EXPLICIT] EFL <- AH
+
+  //! \}
+
+  //! \name ADX Instructions
+  //! \{
+
+  ASMJIT_INST_2x(adcx, Adcx, Gp, Gp)                                   // ADX
+  ASMJIT_INST_2x(adcx, Adcx, Gp, Mem)                                  // ADX
+  ASMJIT_INST_2x(adox, Adox, Gp, Gp)                                   // ADX
+  ASMJIT_INST_2x(adox, Adox, Gp, Mem)                                  // ADX
+
+  //! \}
+
+  //! \name LZCNT/POPCNT Instructions
+  //! \{
+
+  ASMJIT_INST_2x(lzcnt, Lzcnt, Gp, Gp)                                 // LZCNT
+  ASMJIT_INST_2x(lzcnt, Lzcnt, Gp, Mem)                                // LZCNT
+  ASMJIT_INST_2x(popcnt, Popcnt, Gp, Gp)                               // POPCNT
+  ASMJIT_INST_2x(popcnt, Popcnt, Gp, Mem)                              // POPCNT
+
+  //! \}
+
+  //! \name BMI Instructions
+  //! \{
+
+  ASMJIT_INST_3x(andn, Andn, Gp, Gp, Gp)                               // BMI
+  ASMJIT_INST_3x(andn, Andn, Gp, Gp, Mem)                              // BMI
+  ASMJIT_INST_3x(bextr, Bextr, Gp, Gp, Gp)                             // BMI
+  ASMJIT_INST_3x(bextr, Bextr, Gp, Mem, Gp)                            // BMI
+  ASMJIT_INST_2x(blsi, Blsi, Gp, Gp)                                   // BMI
+  ASMJIT_INST_2x(blsi, Blsi, Gp, Mem)                                  // BMI
+  ASMJIT_INST_2x(blsmsk, Blsmsk, Gp, Gp)                               // BMI
+  ASMJIT_INST_2x(blsmsk, Blsmsk, Gp, Mem)                              // BMI
+  ASMJIT_INST_2x(blsr, Blsr, Gp, Gp)                                   // BMI
+  ASMJIT_INST_2x(blsr, Blsr, Gp, Mem)                                  // BMI
+  ASMJIT_INST_2x(tzcnt, Tzcnt, Gp, Gp)                                 // BMI
+  ASMJIT_INST_2x(tzcnt, Tzcnt, Gp, Mem)                                // BMI
+
+  //! \}
+
+  //! \name BMI2 Instructions
+  //! \{
+
+  ASMJIT_INST_3x(bzhi, Bzhi, Gp, Gp, Gp)                               // BMI2
+  ASMJIT_INST_3x(bzhi, Bzhi, Gp, Mem, Gp)                              // BMI2
+  ASMJIT_INST_4x(mulx, Mulx, Gp, Gp, Gp, Gp_ZDX)                       // BMI2      [EXPLICIT]
+  ASMJIT_INST_4x(mulx, Mulx, Gp, Gp, Mem, Gp_ZDX)                      // BMI2      [EXPLICIT]
+  ASMJIT_INST_3x(pdep, Pdep, Gp, Gp, Gp)                               // BMI2
+  ASMJIT_INST_3x(pdep, Pdep, Gp, Gp, Mem)                              // BMI2
+  ASMJIT_INST_3x(pext, Pext, Gp, Gp, Gp)                               // BMI2
+  ASMJIT_INST_3x(pext, Pext, Gp, Gp, Mem)                              // BMI2
+  ASMJIT_INST_3x(rorx, Rorx, Gp, Gp, Imm)                              // BMI2
+  ASMJIT_INST_3x(rorx, Rorx, Gp, Mem, Imm)                             // BMI2
+  ASMJIT_INST_3x(sarx, Sarx, Gp, Gp, Gp)                               // BMI2
+  ASMJIT_INST_3x(sarx, Sarx, Gp, Mem, Gp)                              // BMI2
+  ASMJIT_INST_3x(shlx, Shlx, Gp, Gp, Gp)                               // BMI2
+  ASMJIT_INST_3x(shlx, Shlx, Gp, Mem, Gp)                              // BMI2
+  ASMJIT_INST_3x(shrx, Shrx, Gp, Gp, Gp)                               // BMI2
+  ASMJIT_INST_3x(shrx, Shrx, Gp, Mem, Gp)                              // BMI2
+
+  //! \}
+
+  //! \name TBM Instructions
+  //! \{
+
+  ASMJIT_INST_2x(blcfill, Blcfill, Gp, Gp)                             // TBM
+  ASMJIT_INST_2x(blcfill, Blcfill, Gp, Mem)                            // TBM
+  ASMJIT_INST_2x(blci, Blci, Gp, Gp)                                   // TBM
+  ASMJIT_INST_2x(blci, Blci, Gp, Mem)                                  // TBM
+  ASMJIT_INST_2x(blcic, Blcic, Gp, Gp)                                 // TBM
+  ASMJIT_INST_2x(blcic, Blcic, Gp, Mem)                                // TBM
+  ASMJIT_INST_2x(blcmsk, Blcmsk, Gp, Gp)                               // TBM
+  ASMJIT_INST_2x(blcmsk, Blcmsk, Gp, Mem)                              // TBM
+  ASMJIT_INST_2x(blcs, Blcs, Gp, Gp)                                   // TBM
+  ASMJIT_INST_2x(blcs, Blcs, Gp, Mem)                                  // TBM
+  ASMJIT_INST_2x(blsfill, Blsfill, Gp, Gp)                             // TBM
+  ASMJIT_INST_2x(blsfill, Blsfill, Gp, Mem)                            // TBM
+  ASMJIT_INST_2x(blsic, Blsic, Gp, Gp)                                 // TBM
+  ASMJIT_INST_2x(blsic, Blsic, Gp, Mem)                                // TBM
+  ASMJIT_INST_2x(t1mskc, T1mskc, Gp, Gp)                               // TBM
+  ASMJIT_INST_2x(t1mskc, T1mskc, Gp, Mem)                              // TBM
+  ASMJIT_INST_2x(tzmsk, Tzmsk, Gp, Gp)                                 // TBM
+  ASMJIT_INST_2x(tzmsk, Tzmsk, Gp, Mem)                                // TBM
+
+  //! \}
+
+  //! \name CRC32 Instructions (SSE4.2)
+  //! \{
+
+  ASMJIT_INST_2x(crc32, Crc32, Gp, Gp)                                 // SSE4_2
+  ASMJIT_INST_2x(crc32, Crc32, Gp, Mem)                                // SSE4_2
+
+  //! \}
+
+  //! \name MOVBE Instructions
+  //! \{
+
+  ASMJIT_INST_2x(movbe, Movbe, Gp, Mem)                                // MOVBE
+  ASMJIT_INST_2x(movbe, Movbe, Mem, Gp)                                // MOVBE
+
+  //! \}
+
+  //! \name MOVDIRI & MOVDIR64B Instructions
+  //! \{
+
+  ASMJIT_INST_2x(movdiri, Movdiri, Mem, Gp)                            // MOVDIRI
+  ASMJIT_INST_2x(movdir64b, Movdir64b, Mem, Mem)                       // MOVDIR64B
+
+  //! \}
+
+  //! \name MXCSR Instructions (SSE)
+  //! \{
+
+  ASMJIT_INST_1x(ldmxcsr, Ldmxcsr, Mem)                                // SSE
+  ASMJIT_INST_1x(stmxcsr, Stmxcsr, Mem)                                // SSE
+
+  //! \}
+
+  //! \name FENCE Instructions (SSE and SSE2)
+  //! \{
+
+  ASMJIT_INST_0x(lfence, Lfence)                                       // SSE2
+  ASMJIT_INST_0x(mfence, Mfence)                                       // SSE2
+  ASMJIT_INST_0x(sfence, Sfence)                                       // SSE
+
+  //! \}
+
+  //! \name PREFETCH Instructions
+  //! \{
+
+  ASMJIT_INST_1x(prefetch, Prefetch, Mem)                              // 3DNOW
+  ASMJIT_INST_1x(prefetchnta, Prefetchnta, Mem)                        // SSE
+  ASMJIT_INST_1x(prefetcht0, Prefetcht0, Mem)                          // SSE
+  ASMJIT_INST_1x(prefetcht1, Prefetcht1, Mem)                          // SSE
+  ASMJIT_INST_1x(prefetcht2, Prefetcht2, Mem)                          // SSE
+  ASMJIT_INST_1x(prefetchw, Prefetchw, Mem)                            // PREFETCHW
+  ASMJIT_INST_1x(prefetchwt1, Prefetchwt1, Mem)                        // PREFETCHW1
+
+  //! \}
+
+  //! \name CPUID Instruction
+  //! \{
+
+  ASMJIT_INST_4x(cpuid, Cpuid, Gp_EAX, Gp_EBX, Gp_ECX, Gp_EDX)         // I486 [EXPLICIT] EAX:EBX:ECX:EDX <- CPUID[EAX:ECX]
+
+  //! \}
+
+  //! \name CacheLine Instructions
+  //! \{
+
+  ASMJIT_INST_1x(cldemote, Cldemote, Mem)                              // CLDEMOTE
+  ASMJIT_INST_1x(clflush, Clflush, Mem)                                // CLFLUSH
+  ASMJIT_INST_1x(clflushopt, Clflushopt, Mem)                          // CLFLUSH_OPT
+  ASMJIT_INST_1x(clwb, Clwb, Mem)                                      // CLWB
+  ASMJIT_INST_1x(clzero, Clzero, DS_ZAX)                               // CLZERO [EXPLICIT]
+
+  //! \}
+
+  //! \name SERIALIZE Instruction
+  //! \{
+
+  ASMJIT_INST_0x(serialize, Serialize)                                 // SERIALIZE
+
+  //! \}
+
+  //! \name RDPID Instruction
+  //! \{
+
+  ASMJIT_INST_1x(rdpid, Rdpid, Gp)                                     // RDPID
+
+  //! \}
+
+  //! \name RDPRU/RDPKRU Instructions
+  //! \{
+
+  ASMJIT_INST_3x(rdpru, Rdpru, Gp_EDX, Gp_EAX, Gp_ECX)                 // RDPRU     [EXPLICIT] EDX:EAX <- PRU[ECX]
+  ASMJIT_INST_3x(rdpkru, Rdpkru, Gp_EDX, Gp_EAX, Gp_ECX)               // RDPKRU    [EXPLICIT] EDX:EAX <- PKRU[ECX]
+
+  //! \}
+
+  //! \name RDTSC/RDTSCP Instructions
+  //! \{
+
+  ASMJIT_INST_2x(rdtsc, Rdtsc, Gp_EDX, Gp_EAX)                         // RDTSC     [EXPLICIT] EDX:EAX     <- Counter
+  ASMJIT_INST_3x(rdtscp, Rdtscp, Gp_EDX, Gp_EAX, Gp_ECX)               // RDTSCP    [EXPLICIT] EDX:EAX:EXC <- Counter
+
+  //! \}
+
+  //! \name Other User-Mode Instructions
+  //! \{
+
+  ASMJIT_INST_2x(arpl, Arpl, Gp, Gp)                                   // X86
+  ASMJIT_INST_2x(arpl, Arpl, Mem, Gp)                                  // X86
+  ASMJIT_INST_0x(cli, Cli)                                             // ANY
+  ASMJIT_INST_0x(getsec, Getsec)                                       // SMX
+  ASMJIT_INST_1x(int_, Int, Imm)                                       // ANY
+  ASMJIT_INST_0x(int3, Int3)                                           // ANY
+  ASMJIT_INST_0x(into, Into)                                           // ANY
+  ASMJIT_INST_2x(lar, Lar, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(lar, Lar, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(lds, Lds, Gp, Mem)                                    // X86
+  ASMJIT_INST_2x(les, Les, Gp, Mem)                                    // X86
+  ASMJIT_INST_2x(lfs, Lfs, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(lgs, Lgs, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(lsl, Lsl, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(lsl, Lsl, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(lss, Lss, Gp, Mem)                                    // ANY
+  ASMJIT_INST_0x(pause, Pause)                                         // SSE2
+  ASMJIT_INST_0x(rsm, Rsm)                                             // X86
+  ASMJIT_INST_1x(sgdt, Sgdt, Mem)                                      // ANY
+  ASMJIT_INST_1x(sidt, Sidt, Mem)                                      // ANY
+  ASMJIT_INST_1x(sldt, Sldt, Gp)                                       // ANY
+  ASMJIT_INST_1x(sldt, Sldt, Mem)                                      // ANY
+  ASMJIT_INST_1x(smsw, Smsw, Gp)                                       // ANY
+  ASMJIT_INST_1x(smsw, Smsw, Mem)                                      // ANY
+  ASMJIT_INST_0x(sti, Sti)                                             // ANY
+  ASMJIT_INST_1x(str, Str, Gp)                                         // ANY
+  ASMJIT_INST_1x(str, Str, Mem)                                        // ANY
+  ASMJIT_INST_1x(verr, Verr, Gp)                                       // ANY
+  ASMJIT_INST_1x(verr, Verr, Mem)                                      // ANY
+  ASMJIT_INST_1x(verw, Verw, Gp)                                       // ANY
+  ASMJIT_INST_1x(verw, Verw, Mem)                                      // ANY
+
+  //! \}
+
+  //! \name FSGSBASE Instructions
+  //! \{
+
+  ASMJIT_INST_1x(rdfsbase, Rdfsbase, Gp)                               // FSGSBASE
+  ASMJIT_INST_1x(rdgsbase, Rdgsbase, Gp)                               // FSGSBASE
+  ASMJIT_INST_1x(wrfsbase, Wrfsbase, Gp)                               // FSGSBASE
+  ASMJIT_INST_1x(wrgsbase, Wrgsbase, Gp)                               // FSGSBASE
+
+  //! \}
+
+  //! \name FXSR Instructions
+  //! \{
+
+  ASMJIT_INST_1x(fxrstor, Fxrstor, Mem)                                // FXSR
+  ASMJIT_INST_1x(fxrstor64, Fxrstor64, Mem)                            // FXSR
+  ASMJIT_INST_1x(fxsave, Fxsave, Mem)                                  // FXSR
+  ASMJIT_INST_1x(fxsave64, Fxsave64, Mem)                              // FXSR
+
+  //! \}
+
+  //! \name XSAVE Instructions
+  //! \{
+
+  ASMJIT_INST_3x(xgetbv, Xgetbv, Gp_EDX, Gp_EAX, Gp_ECX)               // XSAVE     [EXPLICIT] EDX:EAX <- XCR[ECX]
+  ASMJIT_INST_3x(xrstor, Xrstor, Mem, Gp_EDX, Gp_EAX)                  // XSAVE     [EXPLICIT]
+  ASMJIT_INST_3x(xrstor64, Xrstor64, Mem, Gp_EDX, Gp_EAX)              // XSAVE+X64 [EXPLICIT]
+  ASMJIT_INST_3x(xrstors, Xrstors, Mem, Gp_EDX, Gp_EAX)                // XSAVE     [EXPLICIT]
+  ASMJIT_INST_3x(xrstors64, Xrstors64, Mem, Gp_EDX, Gp_EAX)            // XSAVE+X64 [EXPLICIT]
+  ASMJIT_INST_3x(xsave, Xsave, Mem, Gp_EDX, Gp_EAX)                    // XSAVE     [EXPLICIT]
+  ASMJIT_INST_3x(xsave64, Xsave64, Mem, Gp_EDX, Gp_EAX)                // XSAVE+X64 [EXPLICIT]
+  ASMJIT_INST_3x(xsavec, Xsavec, Mem, Gp_EDX, Gp_EAX)                  // XSAVE     [EXPLICIT]
+  ASMJIT_INST_3x(xsavec64, Xsavec64, Mem, Gp_EDX, Gp_EAX)              // XSAVE+X64 [EXPLICIT]
+  ASMJIT_INST_3x(xsaveopt, Xsaveopt, Mem, Gp_EDX, Gp_EAX)              // XSAVE     [EXPLICIT]
+  ASMJIT_INST_3x(xsaveopt64, Xsaveopt64, Mem, Gp_EDX, Gp_EAX)          // XSAVE+X64 [EXPLICIT]
+  ASMJIT_INST_3x(xsaves, Xsaves, Mem, Gp_EDX, Gp_EAX)                  // XSAVE     [EXPLICIT]
+  ASMJIT_INST_3x(xsaves64, Xsaves64, Mem, Gp_EDX, Gp_EAX)              // XSAVE+X64 [EXPLICIT]
+
+  //! \}
+
+  //! \name MPX Extensions
+  //! \{
+
+  ASMJIT_INST_2x(bndcl, Bndcl, Bnd, Gp)                                // MPX
+  ASMJIT_INST_2x(bndcl, Bndcl, Bnd, Mem)                               // MPX
+  ASMJIT_INST_2x(bndcn, Bndcn, Bnd, Gp)                                // MPX
+  ASMJIT_INST_2x(bndcn, Bndcn, Bnd, Mem)                               // MPX
+  ASMJIT_INST_2x(bndcu, Bndcu, Bnd, Gp)                                // MPX
+  ASMJIT_INST_2x(bndcu, Bndcu, Bnd, Mem)                               // MPX
+  ASMJIT_INST_2x(bndldx, Bndldx, Bnd, Mem)                             // MPX
+  ASMJIT_INST_2x(bndmk, Bndmk, Bnd, Mem)                               // MPX
+  ASMJIT_INST_2x(bndmov, Bndmov, Bnd, Bnd)                             // MPX
+  ASMJIT_INST_2x(bndmov, Bndmov, Bnd, Mem)                             // MPX
+  ASMJIT_INST_2x(bndmov, Bndmov, Mem, Bnd)                             // MPX
+  ASMJIT_INST_2x(bndstx, Bndstx, Mem, Bnd)                             // MPX
+
+  //! \}
+
+  //! \name MONITORX Instructions
+  //! \{
+
+  ASMJIT_INST_3x(monitorx, Monitorx, Mem, Gp, Gp)                      // MONITORX
+  ASMJIT_INST_3x(mwaitx, Mwaitx, Gp, Gp, Gp)                           // MONITORX
+
+  //! \}
+
+  //! \name MCOMMIT Instruction
+  //! \{
+
+  ASMJIT_INST_0x(mcommit, Mcommit)                                     // MCOMMIT
+
+  //! \}
+
+  //! \name PTWRITE Instruction
+  //! \{
+
+  ASMJIT_INST_1x(ptwrite, Ptwrite, Gp)                                 // PTWRITE
+  ASMJIT_INST_1x(ptwrite, Ptwrite, Mem)                                // PTWRITE
+
+  //! \}
+
+  //! \name ENQCMD Instructions
+  //! \{
+
+  ASMJIT_INST_2x(enqcmd, Enqcmd, Mem, Mem)                             // ENQCMD
+  ASMJIT_INST_2x(enqcmds, Enqcmds, Mem, Mem)                           // ENQCMD
+
+  //! \}
+
+  //! \name WAITPKG Instructions
+  //! \{
+
+  ASMJIT_INST_3x(tpause, Tpause, Gp, Gp, Gp)                           // WAITPKG
+  ASMJIT_INST_1x(umonitor, Umonitor, Mem)                              // WAITPKG
+  ASMJIT_INST_3x(umwait, Umwait, Gp, Gp, Gp)                           // WAITPKG
+
+  //! \}
+
+  //! \name RDRAND & RDSEED Instructions
+  //! \{
+
+  ASMJIT_INST_1x(rdrand, Rdrand, Gp)                                   // RDRAND
+  ASMJIT_INST_1x(rdseed, Rdseed, Gp)                                   // RDSEED
+
+  //! \}
+
+  //! \name LWP Instructions
+  //! \{
+
+  ASMJIT_INST_1x(llwpcb, Llwpcb, Gp)                                   // LWP
+  ASMJIT_INST_3x(lwpins, Lwpins, Gp, Gp, Imm)                          // LWP
+  ASMJIT_INST_3x(lwpins, Lwpins, Gp, Mem, Imm)                         // LWP
+  ASMJIT_INST_3x(lwpval, Lwpval, Gp, Gp, Imm)                          // LWP
+  ASMJIT_INST_3x(lwpval, Lwpval, Gp, Mem, Imm)                         // LWP
+  ASMJIT_INST_1x(slwpcb, Slwpcb, Gp)                                   // LWP
+
+  //! \}
+
+  //! \name RTM & TSX Instructions
+  //! \{
+
+  ASMJIT_INST_1x(xabort, Xabort, Imm)                                  // RTM
+  ASMJIT_INST_1x(xbegin, Xbegin, Label)                                // RTM
+  ASMJIT_INST_1x(xbegin, Xbegin, Imm)                                  // RTM
+  ASMJIT_INST_0x(xend, Xend)                                           // RTM
+  ASMJIT_INST_0x(xtest, Xtest)                                         // TSX
+
+  //! \}
+
+  //! \name TSXLDTRK Instructions
+  //! \{
+
+  ASMJIT_INST_0x(xresldtrk, Xresldtrk)                                 // TSXLDTRK
+  ASMJIT_INST_0x(xsusldtrk, Xsusldtrk)                                 // TSXLDTRK
+
+  //! \}
+
+  //! \name CET-IBT Instructions
+  //! \{
+
+  ASMJIT_INST_0x(endbr32, Endbr32)                                     // CET_IBT
+  ASMJIT_INST_0x(endbr64, Endbr64)                                     // CET_IBT
+
+  //! \}
+
+  //! \name CET-SS Instructions
+  //! \{
+
+  ASMJIT_INST_1x(clrssbsy, Clrssbsy, Mem)                              // CET_SS
+  ASMJIT_INST_0x(setssbsy, Setssbsy)                                   // CET_SS
+
+  ASMJIT_INST_1x(rstorssp, Rstorssp, Mem)                              // CET_SS
+  ASMJIT_INST_0x(saveprevssp, Saveprevssp)                             // CET_SS
+
+  ASMJIT_INST_1x(incsspd, Incsspd, Gp)                                 // CET_SS
+  ASMJIT_INST_1x(incsspq, Incsspq, Gp)                                 // CET_SS
+  ASMJIT_INST_1x(rdsspd, Rdsspd, Gp)                                   // CET_SS
+  ASMJIT_INST_1x(rdsspq, Rdsspq, Gp)                                   // CET_SS
+  ASMJIT_INST_2x(wrssd, Wrssd, Gp, Gp)                                 // CET_SS
+  ASMJIT_INST_2x(wrssd, Wrssd, Mem, Gp)                                // CET_SS
+  ASMJIT_INST_2x(wrssq, Wrssq, Gp, Gp)                                 // CET_SS
+  ASMJIT_INST_2x(wrssq, Wrssq, Mem, Gp)                                // CET_SS
+  ASMJIT_INST_2x(wrussd, Wrussd, Gp, Gp)                               // CET_SS
+  ASMJIT_INST_2x(wrussd, Wrussd, Mem, Gp)                              // CET_SS
+  ASMJIT_INST_2x(wrussq, Wrussq, Gp, Gp)                               // CET_SS
+  ASMJIT_INST_2x(wrussq, Wrussq, Mem, Gp)                              // CET_SS
+
+  //! \}
+
+  //! \name HRESET Instructions
+  //! \{
+
+  ASMJIT_INST_2x(hreset, Hreset, Imm, Gp)                              // HRESET
+
+  //! \}
+
+  //! \name UINTR Instructions
+  //! \{
+
+  ASMJIT_INST_0x(clui, Clui)                                           // UINTR
+  ASMJIT_INST_1x(senduipi, Senduipi, Gp)                               // UINTR
+  ASMJIT_INST_0x(testui, Testui)                                       // UINTR
+  ASMJIT_INST_0x(stui, Stui)                                           // UINTR
+  ASMJIT_INST_0x(uiret, Uiret)                                         // UINTR
+
+  //! \}
+
+  //! \name Core Privileged Instructions
+  //! \{
+
+  ASMJIT_INST_0x(clts, Clts)                                           // ANY
+  ASMJIT_INST_0x(hlt, Hlt)                                             // ANY
+  ASMJIT_INST_0x(invd, Invd)                                           // ANY
+  ASMJIT_INST_1x(invlpg, Invlpg, Mem)                                  // ANY
+  ASMJIT_INST_2x(invpcid, Invpcid, Gp, Mem)                            // ANY
+  ASMJIT_INST_1x(lgdt, Lgdt, Mem)                                      // ANY
+  ASMJIT_INST_1x(lidt, Lidt, Mem)                                      // ANY
+  ASMJIT_INST_1x(lldt, Lldt, Gp)                                       // ANY
+  ASMJIT_INST_1x(lldt, Lldt, Mem)                                      // ANY
+  ASMJIT_INST_1x(lmsw, Lmsw, Gp)                                       // ANY
+  ASMJIT_INST_1x(lmsw, Lmsw, Mem)                                      // ANY
+  ASMJIT_INST_1x(ltr, Ltr, Gp)                                         // ANY
+  ASMJIT_INST_1x(ltr, Ltr, Mem)                                        // ANY
+  ASMJIT_INST_3x(rdmsr, Rdmsr, Gp_EDX, Gp_EAX, Gp_ECX)                 // MSR       [EXPLICIT] RDX:EAX <- MSR[ECX]
+  ASMJIT_INST_3x(rdpmc, Rdpmc, Gp_EDX, Gp_EAX, Gp_ECX)                 // ANY       [EXPLICIT] RDX:EAX <- PMC[ECX]
+  ASMJIT_INST_0x(swapgs, Swapgs)                                       // X64
+  ASMJIT_INST_0x(wbinvd, Wbinvd)                                       // ANY
+  ASMJIT_INST_0x(wbnoinvd, Wbnoinvd)                                   // WBNOINVD
+  ASMJIT_INST_3x(wrmsr, Wrmsr, Gp_EDX, Gp_EAX, Gp_ECX)                 // MSR       [EXPLICIT] RDX:EAX  -> MSR[ECX]
+  ASMJIT_INST_3x(xsetbv, Xsetbv, Gp_EDX, Gp_EAX, Gp_ECX)               // XSAVE     [EXPLICIT] XCR[ECX] <- EDX:EAX
+
+  //! \}
+
+  //! \name MONITOR Instructions (Privileged)
+  //! \{
+
+  ASMJIT_INST_3x(monitor, Monitor, Mem, Gp, Gp)                        // MONITOR
+  ASMJIT_INST_2x(mwait, Mwait, Gp, Gp)                                 // MONITOR
+
+  //! \}
+
+  //! \name SMAP Instructions (Privileged)
+  //! \{
+
+  ASMJIT_INST_0x(clac, Clac)                                           // SMAP
+  ASMJIT_INST_0x(stac, Stac)                                           // SMAP
+
+  //! \}
+
+  //! \name SKINIT Instructions (Privileged)
+  //! \{
+
+  ASMJIT_INST_1x(skinit, Skinit, Gp)                                   // SKINIT    [EXPLICIT] <eax>
+  ASMJIT_INST_0x(stgi, Stgi)                                           // SKINIT
+
+  //! \}
+
+  //! \name SNP Instructions (Privileged)
+  //! \{
+
+  ASMJIT_INST_0x(psmash, Psmash)                                       // SNP
+  ASMJIT_INST_0x(pvalidate, Pvalidate)                                 // SNP
+  ASMJIT_INST_0x(rmpadjust, Rmpadjust)                                 // SNP
+  ASMJIT_INST_0x(rmpupdate, Rmpupdate)                                 // SNP
+
+  //! \}
+
+  //! \name VMX Instructions (All privileged except vmfunc)
+  //! \{
+
+  ASMJIT_INST_2x(invept, Invept, Gp, Mem)                              // VMX
+  ASMJIT_INST_2x(invvpid, Invvpid, Gp, Mem)                            // VMX
+  ASMJIT_INST_0x(vmcall, Vmcall)                                       // VMX
+  ASMJIT_INST_1x(vmclear, Vmclear, Mem)                                // VMX
+  ASMJIT_INST_0x(vmfunc, Vmfunc)                                       // VMX
+  ASMJIT_INST_0x(vmlaunch, Vmlaunch)                                   // VMX
+  ASMJIT_INST_1x(vmptrld, Vmptrld, Mem)                                // VMX
+  ASMJIT_INST_1x(vmptrst, Vmptrst, Mem)                                // VMX
+  ASMJIT_INST_2x(vmread, Vmread, Gp, Gp)                               // VMX
+  ASMJIT_INST_2x(vmread, Vmread, Mem, Gp)                              // VMX
+  ASMJIT_INST_0x(vmresume, Vmresume)                                   // VMX
+  ASMJIT_INST_2x(vmwrite, Vmwrite, Gp, Mem)                            // VMX
+  ASMJIT_INST_2x(vmwrite, Vmwrite, Gp, Gp)                             // VMX
+  ASMJIT_INST_1x(vmxon, Vmxon, Mem)                                    // VMX
+
+  //! \}
+
+  //! \name SVM Instructions (All privileged except vmmcall)
+  //! \{
+
+  ASMJIT_INST_0x(clgi, Clgi)                                           // SVM
+  ASMJIT_INST_2x(invlpga, Invlpga, Gp, Gp)                             // SVM       [EXPLICIT] <eax|rax, ecx>
+  ASMJIT_INST_1x(vmload, Vmload, Gp)                                   // SVM       [EXPLICIT] <zax>
+  ASMJIT_INST_0x(vmmcall, Vmmcall)                                     // SVM
+  ASMJIT_INST_1x(vmrun, Vmrun, Gp)                                     // SVM       [EXPLICIT] <zax>
+  ASMJIT_INST_1x(vmsave, Vmsave, Gp)                                   // SVM       [EXPLICIT] <zax>
+
+  //! \}
+
+  //! \name FPU Instructions
+  //! \{
+
+  ASMJIT_INST_0x(f2xm1, F2xm1)                                         // FPU
+  ASMJIT_INST_0x(fabs, Fabs)                                           // FPU
+  ASMJIT_INST_2x(fadd, Fadd, St, St)                                   // FPU
+  ASMJIT_INST_1x(fadd, Fadd, Mem)                                      // FPU
+  ASMJIT_INST_1x(faddp, Faddp, St)                                     // FPU
+  ASMJIT_INST_0x(faddp, Faddp)                                         // FPU
+  ASMJIT_INST_1x(fbld, Fbld, Mem)                                      // FPU
+  ASMJIT_INST_1x(fbstp, Fbstp, Mem)                                    // FPU
+  ASMJIT_INST_0x(fchs, Fchs)                                           // FPU
+  ASMJIT_INST_0x(fclex, Fclex)                                         // FPU
+  ASMJIT_INST_1x(fcmovb, Fcmovb, St)                                   // FPU
+  ASMJIT_INST_1x(fcmovbe, Fcmovbe, St)                                 // FPU
+  ASMJIT_INST_1x(fcmove, Fcmove, St)                                   // FPU
+  ASMJIT_INST_1x(fcmovnb, Fcmovnb, St)                                 // FPU
+  ASMJIT_INST_1x(fcmovnbe, Fcmovnbe, St)                               // FPU
+  ASMJIT_INST_1x(fcmovne, Fcmovne, St)                                 // FPU
+  ASMJIT_INST_1x(fcmovnu, Fcmovnu, St)                                 // FPU
+  ASMJIT_INST_1x(fcmovu, Fcmovu, St)                                   // FPU
+  ASMJIT_INST_1x(fcom, Fcom, St)                                       // FPU
+  ASMJIT_INST_0x(fcom, Fcom)                                           // FPU
+  ASMJIT_INST_1x(fcom, Fcom, Mem)                                      // FPU
+  ASMJIT_INST_1x(fcomp, Fcomp, St)                                     // FPU
+  ASMJIT_INST_0x(fcomp, Fcomp)                                         // FPU
+  ASMJIT_INST_1x(fcomp, Fcomp, Mem)                                    // FPU
+  ASMJIT_INST_0x(fcompp, Fcompp)                                       // FPU
+  ASMJIT_INST_1x(fcomi, Fcomi, St)                                     // FPU
+  ASMJIT_INST_1x(fcomip, Fcomip, St)                                   // FPU
+  ASMJIT_INST_0x(fcos, Fcos)                                           // FPU
+  ASMJIT_INST_0x(fdecstp, Fdecstp)                                     // FPU
+  ASMJIT_INST_2x(fdiv, Fdiv, St, St)                                   // FPU
+  ASMJIT_INST_1x(fdiv, Fdiv, Mem)                                      // FPU
+  ASMJIT_INST_1x(fdivp, Fdivp, St)                                     // FPU
+  ASMJIT_INST_0x(fdivp, Fdivp)                                         // FPU
+  ASMJIT_INST_2x(fdivr, Fdivr, St, St)                                 // FPU
+  ASMJIT_INST_1x(fdivr, Fdivr, Mem)                                    // FPU
+  ASMJIT_INST_1x(fdivrp, Fdivrp, St)                                   // FPU
+  ASMJIT_INST_0x(fdivrp, Fdivrp)                                       // FPU
+  ASMJIT_INST_1x(ffree, Ffree, St)                                     // FPU
+  ASMJIT_INST_1x(fiadd, Fiadd, Mem)                                    // FPU
+  ASMJIT_INST_1x(ficom, Ficom, Mem)                                    // FPU
+  ASMJIT_INST_1x(ficomp, Ficomp, Mem)                                  // FPU
+  ASMJIT_INST_1x(fidiv, Fidiv, Mem)                                    // FPU
+  ASMJIT_INST_1x(fidivr, Fidivr, Mem)                                  // FPU
+  ASMJIT_INST_1x(fild, Fild, Mem)                                      // FPU
+  ASMJIT_INST_1x(fimul, Fimul, Mem)                                    // FPU
+  ASMJIT_INST_0x(fincstp, Fincstp)                                     // FPU
+  ASMJIT_INST_0x(finit, Finit)                                         // FPU
+  ASMJIT_INST_1x(fisub, Fisub, Mem)                                    // FPU
+  ASMJIT_INST_1x(fisubr, Fisubr, Mem)                                  // FPU
+  ASMJIT_INST_0x(fninit, Fninit)                                       // FPU
+  ASMJIT_INST_1x(fist, Fist, Mem)                                      // FPU
+  ASMJIT_INST_1x(fistp, Fistp, Mem)                                    // FPU
+  ASMJIT_INST_1x(fisttp, Fisttp, Mem)                                  // FPU+SSE3
+  ASMJIT_INST_1x(fld, Fld, Mem)                                        // FPU
+  ASMJIT_INST_1x(fld, Fld, St)                                         // FPU
+  ASMJIT_INST_0x(fld1, Fld1)                                           // FPU
+  ASMJIT_INST_0x(fldl2t, Fldl2t)                                       // FPU
+  ASMJIT_INST_0x(fldl2e, Fldl2e)                                       // FPU
+  ASMJIT_INST_0x(fldpi, Fldpi)                                         // FPU
+  ASMJIT_INST_0x(fldlg2, Fldlg2)                                       // FPU
+  ASMJIT_INST_0x(fldln2, Fldln2)                                       // FPU
+  ASMJIT_INST_0x(fldz, Fldz)                                           // FPU
+  ASMJIT_INST_1x(fldcw, Fldcw, Mem)                                    // FPU
+  ASMJIT_INST_1x(fldenv, Fldenv, Mem)                                  // FPU
+  ASMJIT_INST_2x(fmul, Fmul, St, St)                                   // FPU
+  ASMJIT_INST_1x(fmul, Fmul, Mem)                                      // FPU
+  ASMJIT_INST_1x(fmulp, Fmulp, St)                                     // FPU
+  ASMJIT_INST_0x(fmulp, Fmulp)                                         // FPU
+  ASMJIT_INST_0x(fnclex, Fnclex)                                       // FPU
+  ASMJIT_INST_0x(fnop, Fnop)                                           // FPU
+  ASMJIT_INST_1x(fnsave, Fnsave, Mem)                                  // FPU
+  ASMJIT_INST_1x(fnstenv, Fnstenv, Mem)                                // FPU
+  ASMJIT_INST_1x(fnstcw, Fnstcw, Mem)                                  // FPU
+  ASMJIT_INST_0x(fpatan, Fpatan)                                       // FPU
+  ASMJIT_INST_0x(fprem, Fprem)                                         // FPU
+  ASMJIT_INST_0x(fprem1, Fprem1)                                       // FPU
+  ASMJIT_INST_0x(fptan, Fptan)                                         // FPU
+  ASMJIT_INST_0x(frndint, Frndint)                                     // FPU
+  ASMJIT_INST_1x(frstor, Frstor, Mem)                                  // FPU
+  ASMJIT_INST_1x(fsave, Fsave, Mem)                                    // FPU
+  ASMJIT_INST_0x(fscale, Fscale)                                       // FPU
+  ASMJIT_INST_0x(fsin, Fsin)                                           // FPU
+  ASMJIT_INST_0x(fsincos, Fsincos)                                     // FPU
+  ASMJIT_INST_0x(fsqrt, Fsqrt)                                         // FPU
+  ASMJIT_INST_1x(fst, Fst, Mem)                                        // FPU
+  ASMJIT_INST_1x(fst, Fst, St)                                         // FPU
+  ASMJIT_INST_1x(fstp, Fstp, Mem)                                      // FPU
+  ASMJIT_INST_1x(fstp, Fstp, St)                                       // FPU
+  ASMJIT_INST_1x(fstcw, Fstcw, Mem)                                    // FPU
+  ASMJIT_INST_1x(fstenv, Fstenv, Mem)                                  // FPU
+  ASMJIT_INST_2x(fsub, Fsub, St, St)                                   // FPU
+  ASMJIT_INST_1x(fsub, Fsub, Mem)                                      // FPU
+  ASMJIT_INST_1x(fsubp, Fsubp, St)                                     // FPU
+  ASMJIT_INST_0x(fsubp, Fsubp)                                         // FPU
+  ASMJIT_INST_2x(fsubr, Fsubr, St, St)                                 // FPU
+  ASMJIT_INST_1x(fsubr, Fsubr, Mem)                                    // FPU
+  ASMJIT_INST_1x(fsubrp, Fsubrp, St)                                   // FPU
+  ASMJIT_INST_0x(fsubrp, Fsubrp)                                       // FPU
+  ASMJIT_INST_0x(ftst, Ftst)                                           // FPU
+  ASMJIT_INST_1x(fucom, Fucom, St)                                     // FPU
+  ASMJIT_INST_0x(fucom, Fucom)                                         // FPU
+  ASMJIT_INST_1x(fucomi, Fucomi, St)                                   // FPU
+  ASMJIT_INST_1x(fucomip, Fucomip, St)                                 // FPU
+  ASMJIT_INST_1x(fucomp, Fucomp, St)                                   // FPU
+  ASMJIT_INST_0x(fucomp, Fucomp)                                       // FPU
+  ASMJIT_INST_0x(fucompp, Fucompp)                                     // FPU
+  ASMJIT_INST_0x(fwait, Fwait)                                         // FPU
+  ASMJIT_INST_0x(fxam, Fxam)                                           // FPU
+  ASMJIT_INST_1x(fxch, Fxch, St)                                       // FPU
+  ASMJIT_INST_0x(fxtract, Fxtract)                                     // FPU
+  ASMJIT_INST_0x(fyl2x, Fyl2x)                                         // FPU
+  ASMJIT_INST_0x(fyl2xp1, Fyl2xp1)                                     // FPU
+  ASMJIT_INST_1x(fstsw, Fstsw, Gp)                                     // FPU
+  ASMJIT_INST_1x(fstsw, Fstsw, Mem)                                    // FPU
+  ASMJIT_INST_1x(fnstsw, Fnstsw, Gp)                                   // FPU
+  ASMJIT_INST_1x(fnstsw, Fnstsw, Mem)                                  // FPU
+
+  //! \}
+
+  //! \name MMX & SSE+ Instructions
+  //! \{
+
+  ASMJIT_INST_2x(addpd, Addpd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(addpd, Addpd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(addps, Addps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(addps, Addps, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(addsd, Addsd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(addsd, Addsd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(addss, Addss, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(addss, Addss, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(addsubpd, Addsubpd, Xmm, Xmm)                         // SSE3
+  ASMJIT_INST_2x(addsubpd, Addsubpd, Xmm, Mem)                         // SSE3
+  ASMJIT_INST_2x(addsubps, Addsubps, Xmm, Xmm)                         // SSE3
+  ASMJIT_INST_2x(addsubps, Addsubps, Xmm, Mem)                         // SSE3
+  ASMJIT_INST_2x(andnpd, Andnpd, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(andnpd, Andnpd, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(andnps, Andnps, Xmm, Xmm)                             // SSE
+  ASMJIT_INST_2x(andnps, Andnps, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(andpd, Andpd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(andpd, Andpd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(andps, Andps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(andps, Andps, Xmm, Mem)                               // SSE
+  ASMJIT_INST_3x(blendpd, Blendpd, Xmm, Xmm, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(blendpd, Blendpd, Xmm, Mem, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(blendps, Blendps, Xmm, Xmm, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(blendps, Blendps, Xmm, Mem, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(blendvpd, Blendvpd, Xmm, Xmm, XMM0)                   // SSE4_1 [EXPLICIT]
+  ASMJIT_INST_3x(blendvpd, Blendvpd, Xmm, Mem, XMM0)                   // SSE4_1 [EXPLICIT]
+  ASMJIT_INST_3x(blendvps, Blendvps, Xmm, Xmm, XMM0)                   // SSE4_1 [EXPLICIT]
+  ASMJIT_INST_3x(blendvps, Blendvps, Xmm, Mem, XMM0)                   // SSE4_1 [EXPLICIT]
+  ASMJIT_INST_3x(cmppd, Cmppd, Xmm, Xmm, Imm)                          // SSE2
+  ASMJIT_INST_3x(cmppd, Cmppd, Xmm, Mem, Imm)                          // SSE2
+  ASMJIT_INST_3x(cmpps, Cmpps, Xmm, Xmm, Imm)                          // SSE
+  ASMJIT_INST_3x(cmpps, Cmpps, Xmm, Mem, Imm)                          // SSE
+  ASMJIT_INST_3x(cmpsd, Cmpsd, Xmm, Xmm, Imm)                          // SSE2
+  ASMJIT_INST_3x(cmpsd, Cmpsd, Xmm, Mem, Imm)                          // SSE2
+  ASMJIT_INST_3x(cmpss, Cmpss, Xmm, Xmm, Imm)                          // SSE
+  ASMJIT_INST_3x(cmpss, Cmpss, Xmm, Mem, Imm)                          // SSE
+  ASMJIT_INST_2x(comisd, Comisd, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(comisd, Comisd, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(comiss, Comiss, Xmm, Xmm)                             // SSE
+  ASMJIT_INST_2x(comiss, Comiss, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(cvtdq2pd, Cvtdq2pd, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(cvtdq2pd, Cvtdq2pd, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtdq2ps, Cvtdq2ps, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(cvtdq2ps, Cvtdq2ps, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtpd2dq, Cvtpd2dq, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(cvtpd2dq, Cvtpd2dq, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtpd2pi, Cvtpd2pi, Mm, Xmm)                          // SSE2
+  ASMJIT_INST_2x(cvtpd2pi, Cvtpd2pi, Mm, Mem)                          // SSE2
+  ASMJIT_INST_2x(cvtpd2ps, Cvtpd2ps, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(cvtpd2ps, Cvtpd2ps, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtpi2pd, Cvtpi2pd, Xmm, Mm)                          // SSE2
+  ASMJIT_INST_2x(cvtpi2pd, Cvtpi2pd, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtpi2ps, Cvtpi2ps, Xmm, Mm)                          // SSE
+  ASMJIT_INST_2x(cvtpi2ps, Cvtpi2ps, Xmm, Mem)                         // SSE
+  ASMJIT_INST_2x(cvtps2dq, Cvtps2dq, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(cvtps2dq, Cvtps2dq, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtps2pd, Cvtps2pd, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(cvtps2pd, Cvtps2pd, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtps2pi, Cvtps2pi, Mm, Xmm)                          // SSE
+  ASMJIT_INST_2x(cvtps2pi, Cvtps2pi, Mm, Mem)                          // SSE
+  ASMJIT_INST_2x(cvtsd2si, Cvtsd2si, Gp, Xmm)                          // SSE2
+  ASMJIT_INST_2x(cvtsd2si, Cvtsd2si, Gp, Mem)                          // SSE2
+  ASMJIT_INST_2x(cvtsd2ss, Cvtsd2ss, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(cvtsd2ss, Cvtsd2ss, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtsi2sd, Cvtsi2sd, Xmm, Gp)                          // SSE2
+  ASMJIT_INST_2x(cvtsi2sd, Cvtsi2sd, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtsi2ss, Cvtsi2ss, Xmm, Gp)                          // SSE
+  ASMJIT_INST_2x(cvtsi2ss, Cvtsi2ss, Xmm, Mem)                         // SSE
+  ASMJIT_INST_2x(cvtss2sd, Cvtss2sd, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(cvtss2sd, Cvtss2sd, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtss2si, Cvtss2si, Gp, Xmm)                          // SSE
+  ASMJIT_INST_2x(cvtss2si, Cvtss2si, Gp, Mem)                          // SSE
+  ASMJIT_INST_2x(cvttpd2pi, Cvttpd2pi, Mm, Xmm)                        // SSE2
+  ASMJIT_INST_2x(cvttpd2pi, Cvttpd2pi, Mm, Mem)                        // SSE2
+  ASMJIT_INST_2x(cvttpd2dq, Cvttpd2dq, Xmm, Xmm)                       // SSE2
+  ASMJIT_INST_2x(cvttpd2dq, Cvttpd2dq, Xmm, Mem)                       // SSE2
+  ASMJIT_INST_2x(cvttps2dq, Cvttps2dq, Xmm, Xmm)                       // SSE2
+  ASMJIT_INST_2x(cvttps2dq, Cvttps2dq, Xmm, Mem)                       // SSE2
+  ASMJIT_INST_2x(cvttps2pi, Cvttps2pi, Mm, Xmm)                        // SSE
+  ASMJIT_INST_2x(cvttps2pi, Cvttps2pi, Mm, Mem)                        // SSE
+  ASMJIT_INST_2x(cvttsd2si, Cvttsd2si, Gp, Xmm)                        // SSE2
+  ASMJIT_INST_2x(cvttsd2si, Cvttsd2si, Gp, Mem)                        // SSE2
+  ASMJIT_INST_2x(cvttss2si, Cvttss2si, Gp, Xmm)                        // SSE
+  ASMJIT_INST_2x(cvttss2si, Cvttss2si, Gp, Mem)                        // SSE
+  ASMJIT_INST_2x(divpd, Divpd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(divpd, Divpd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(divps, Divps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(divps, Divps, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(divsd, Divsd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(divsd, Divsd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(divss, Divss, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(divss, Divss, Xmm, Mem)                               // SSE
+  ASMJIT_INST_3x(dppd, Dppd, Xmm, Xmm, Imm)                            // SSE4_1
+  ASMJIT_INST_3x(dppd, Dppd, Xmm, Mem, Imm)                            // SSE4_1
+  ASMJIT_INST_3x(dpps, Dpps, Xmm, Xmm, Imm)                            // SSE4_1
+  ASMJIT_INST_3x(dpps, Dpps, Xmm, Mem, Imm)                            // SSE4_1
+  ASMJIT_INST_3x(extractps, Extractps, Gp, Xmm, Imm)                   // SSE4_1
+  ASMJIT_INST_3x(extractps, Extractps, Mem, Xmm, Imm)                  // SSE4_1
+  ASMJIT_INST_2x(extrq, Extrq, Xmm, Xmm)                               // SSE4A
+  ASMJIT_INST_3x(extrq, Extrq, Xmm, Imm, Imm)                          // SSE4A
+  ASMJIT_INST_2x(haddpd, Haddpd, Xmm, Xmm)                             // SSE3
+  ASMJIT_INST_2x(haddpd, Haddpd, Xmm, Mem)                             // SSE3
+  ASMJIT_INST_2x(haddps, Haddps, Xmm, Xmm)                             // SSE3
+  ASMJIT_INST_2x(haddps, Haddps, Xmm, Mem)                             // SSE3
+  ASMJIT_INST_2x(hsubpd, Hsubpd, Xmm, Xmm)                             // SSE3
+  ASMJIT_INST_2x(hsubpd, Hsubpd, Xmm, Mem)                             // SSE3
+  ASMJIT_INST_2x(hsubps, Hsubps, Xmm, Xmm)                             // SSE3
+  ASMJIT_INST_2x(hsubps, Hsubps, Xmm, Mem)                             // SSE3
+  ASMJIT_INST_3x(insertps, Insertps, Xmm, Xmm, Imm)                    // SSE4_1
+  ASMJIT_INST_3x(insertps, Insertps, Xmm, Mem, Imm)                    // SSE4_1
+  ASMJIT_INST_2x(insertq, Insertq, Xmm, Xmm)                           // SSE4A
+  ASMJIT_INST_4x(insertq, Insertq, Xmm, Xmm, Imm, Imm)                 // SSE4A
+  ASMJIT_INST_2x(lddqu, Lddqu, Xmm, Mem)                               // SSE3
+  ASMJIT_INST_3x(maskmovq, Maskmovq, Mm, Mm, DS_ZDI)                   // SSE  [EXPLICIT]
+  ASMJIT_INST_3x(maskmovdqu, Maskmovdqu, Xmm, Xmm, DS_ZDI)             // SSE2 [EXPLICIT]
+  ASMJIT_INST_2x(maxpd, Maxpd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(maxpd, Maxpd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(maxps, Maxps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(maxps, Maxps, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(maxsd, Maxsd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(maxsd, Maxsd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(maxss, Maxss, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(maxss, Maxss, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(minpd, Minpd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(minpd, Minpd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(minps, Minps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(minps, Minps, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(minsd, Minsd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(minsd, Minsd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(minss, Minss, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(minss, Minss, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(movapd, Movapd, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movapd, Movapd, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(movapd, Movapd, Mem, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movaps, Movaps, Xmm, Xmm)                             // SSE
+  ASMJIT_INST_2x(movaps, Movaps, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(movaps, Movaps, Mem, Xmm)                             // SSE
+  ASMJIT_INST_2x(movd, Movd, Mem, Mm)                                  // MMX
+  ASMJIT_INST_2x(movd, Movd, Mem, Xmm)                                 // SSE
+  ASMJIT_INST_2x(movd, Movd, Gp, Mm)                                   // MMX
+  ASMJIT_INST_2x(movd, Movd, Gp, Xmm)                                  // SSE
+  ASMJIT_INST_2x(movd, Movd, Mm, Mem)                                  // MMX
+  ASMJIT_INST_2x(movd, Movd, Xmm, Mem)                                 // SSE
+  ASMJIT_INST_2x(movd, Movd, Mm, Gp)                                   // MMX
+  ASMJIT_INST_2x(movd, Movd, Xmm, Gp)                                  // SSE
+  ASMJIT_INST_2x(movddup, Movddup, Xmm, Xmm)                           // SSE3
+  ASMJIT_INST_2x(movddup, Movddup, Xmm, Mem)                           // SSE3
+  ASMJIT_INST_2x(movdq2q, Movdq2q, Mm, Xmm)                            // SSE2
+  ASMJIT_INST_2x(movdqa, Movdqa, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movdqa, Movdqa, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(movdqa, Movdqa, Mem, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movdqu, Movdqu, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movdqu, Movdqu, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(movdqu, Movdqu, Mem, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movhlps, Movhlps, Xmm, Xmm)                           // SSE
+  ASMJIT_INST_2x(movhpd, Movhpd, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(movhpd, Movhpd, Mem, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movhps, Movhps, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(movhps, Movhps, Mem, Xmm)                             // SSE
+  ASMJIT_INST_2x(movlhps, Movlhps, Xmm, Xmm)                           // SSE
+  ASMJIT_INST_2x(movlpd, Movlpd, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(movlpd, Movlpd, Mem, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movlps, Movlps, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(movlps, Movlps, Mem, Xmm)                             // SSE
+  ASMJIT_INST_2x(movmskps, Movmskps, Gp, Xmm)                          // SSE2
+  ASMJIT_INST_2x(movmskpd, Movmskpd, Gp, Xmm)                          // SSE2
+  ASMJIT_INST_2x(movntdq, Movntdq, Mem, Xmm)                           // SSE2
+  ASMJIT_INST_2x(movntdqa, Movntdqa, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(movntpd, Movntpd, Mem, Xmm)                           // SSE2
+  ASMJIT_INST_2x(movntps, Movntps, Mem, Xmm)                           // SSE
+  ASMJIT_INST_2x(movntsd, Movntsd, Mem, Xmm)                           // SSE4A
+  ASMJIT_INST_2x(movntss, Movntss, Mem, Xmm)                           // SSE4A
+  ASMJIT_INST_2x(movntq, Movntq, Mem, Mm)                              // SSE
+  ASMJIT_INST_2x(movq, Movq, Mm, Mm)                                   // MMX
+  ASMJIT_INST_2x(movq, Movq, Xmm, Xmm)                                 // SSE
+  ASMJIT_INST_2x(movq, Movq, Mem, Mm)                                  // MMX
+  ASMJIT_INST_2x(movq, Movq, Mem, Xmm)                                 // SSE
+  ASMJIT_INST_2x(movq, Movq, Mm, Mem)                                  // MMX
+  ASMJIT_INST_2x(movq, Movq, Xmm, Mem)                                 // SSE
+  ASMJIT_INST_2x(movq, Movq, Gp, Mm)                                   // MMX
+  ASMJIT_INST_2x(movq, Movq, Gp, Xmm)                                  // SSE+X64.
+  ASMJIT_INST_2x(movq, Movq, Mm, Gp)                                   // MMX
+  ASMJIT_INST_2x(movq, Movq, Xmm, Gp)                                  // SSE+X64.
+  ASMJIT_INST_2x(movq2dq, Movq2dq, Xmm, Mm)                            // SSE2
+  ASMJIT_INST_2x(movsd, Movsd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(movsd, Movsd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(movsd, Movsd, Mem, Xmm)                               // SSE2
+  ASMJIT_INST_2x(movshdup, Movshdup, Xmm, Xmm)                         // SSE3
+  ASMJIT_INST_2x(movshdup, Movshdup, Xmm, Mem)                         // SSE3
+  ASMJIT_INST_2x(movsldup, Movsldup, Xmm, Xmm)                         // SSE3
+  ASMJIT_INST_2x(movsldup, Movsldup, Xmm, Mem)                         // SSE3
+  ASMJIT_INST_2x(movss, Movss, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(movss, Movss, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(movss, Movss, Mem, Xmm)                               // SSE
+  ASMJIT_INST_2x(movupd, Movupd, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movupd, Movupd, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(movupd, Movupd, Mem, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movups, Movups, Xmm, Xmm)                             // SSE
+  ASMJIT_INST_2x(movups, Movups, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(movups, Movups, Mem, Xmm)                             // SSE
+  ASMJIT_INST_3x(mpsadbw, Mpsadbw, Xmm, Xmm, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(mpsadbw, Mpsadbw, Xmm, Mem, Imm)                      // SSE4_1
+  ASMJIT_INST_2x(mulpd, Mulpd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(mulpd, Mulpd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(mulps, Mulps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(mulps, Mulps, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(mulsd, Mulsd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(mulsd, Mulsd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(mulss, Mulss, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(mulss, Mulss, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(orpd, Orpd, Xmm, Xmm)                                 // SSE2
+  ASMJIT_INST_2x(orpd, Orpd, Xmm, Mem)                                 // SSE2
+  ASMJIT_INST_2x(orps, Orps, Xmm, Xmm)                                 // SSE
+  ASMJIT_INST_2x(orps, Orps, Xmm, Mem)                                 // SSE
+  ASMJIT_INST_2x(packssdw, Packssdw, Mm, Mm)                           // MMX
+  ASMJIT_INST_2x(packssdw, Packssdw, Mm, Mem)                          // MMX
+  ASMJIT_INST_2x(packssdw, Packssdw, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(packssdw, Packssdw, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(packsswb, Packsswb, Mm, Mm)                           // MMX
+  ASMJIT_INST_2x(packsswb, Packsswb, Mm, Mem)                          // MMX
+  ASMJIT_INST_2x(packsswb, Packsswb, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(packsswb, Packsswb, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(packusdw, Packusdw, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(packusdw, Packusdw, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(packuswb, Packuswb, Mm, Mm)                           // MMX
+  ASMJIT_INST_2x(packuswb, Packuswb, Mm, Mem)                          // MMX
+  ASMJIT_INST_2x(packuswb, Packuswb, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(packuswb, Packuswb, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(pabsb, Pabsb, Mm, Mm)                                 // SSSE3
+  ASMJIT_INST_2x(pabsb, Pabsb, Mm, Mem)                                // SSSE3
+  ASMJIT_INST_2x(pabsb, Pabsb, Xmm, Xmm)                               // SSSE3
+  ASMJIT_INST_2x(pabsb, Pabsb, Xmm, Mem)                               // SSSE3
+  ASMJIT_INST_2x(pabsd, Pabsd, Mm, Mm)                                 // SSSE3
+  ASMJIT_INST_2x(pabsd, Pabsd, Mm, Mem)                                // SSSE3
+  ASMJIT_INST_2x(pabsd, Pabsd, Xmm, Xmm)                               // SSSE3
+  ASMJIT_INST_2x(pabsd, Pabsd, Xmm, Mem)                               // SSSE3
+  ASMJIT_INST_2x(pabsw, Pabsw, Mm, Mm)                                 // SSSE3
+  ASMJIT_INST_2x(pabsw, Pabsw, Mm, Mem)                                // SSSE3
+  ASMJIT_INST_2x(pabsw, Pabsw, Xmm, Xmm)                               // SSSE3
+  ASMJIT_INST_2x(pabsw, Pabsw, Xmm, Mem)                               // SSSE3
+  ASMJIT_INST_2x(paddb, Paddb, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(paddb, Paddb, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(paddb, Paddb, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(paddb, Paddb, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(paddd, Paddd, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(paddd, Paddd, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(paddd, Paddd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(paddd, Paddd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(paddq, Paddq, Mm, Mm)                                 // SSE2
+  ASMJIT_INST_2x(paddq, Paddq, Mm, Mem)                                // SSE2
+  ASMJIT_INST_2x(paddq, Paddq, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(paddq, Paddq, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(paddsb, Paddsb, Mm, Mm)                               // MMX
+  ASMJIT_INST_2x(paddsb, Paddsb, Mm, Mem)                              // MMX
+  ASMJIT_INST_2x(paddsb, Paddsb, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(paddsb, Paddsb, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(paddsw, Paddsw, Mm, Mm)                               // MMX
+  ASMJIT_INST_2x(paddsw, Paddsw, Mm, Mem)                              // MMX
+  ASMJIT_INST_2x(paddsw, Paddsw, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(paddsw, Paddsw, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(paddusb, Paddusb, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(paddusb, Paddusb, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(paddusb, Paddusb, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(paddusb, Paddusb, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(paddusw, Paddusw, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(paddusw, Paddusw, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(paddusw, Paddusw, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(paddusw, Paddusw, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(paddw, Paddw, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(paddw, Paddw, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(paddw, Paddw, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(paddw, Paddw, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_3x(palignr, Palignr, Mm, Mm, Imm)                        // SSSE3
+  ASMJIT_INST_3x(palignr, Palignr, Mm, Mem, Imm)                       // SSSE3
+  ASMJIT_INST_3x(palignr, Palignr, Xmm, Xmm, Imm)                      // SSSE3
+  ASMJIT_INST_3x(palignr, Palignr, Xmm, Mem, Imm)                      // SSSE3
+  ASMJIT_INST_2x(pand, Pand, Mm, Mm)                                   // MMX
+  ASMJIT_INST_2x(pand, Pand, Mm, Mem)                                  // MMX
+  ASMJIT_INST_2x(pand, Pand, Xmm, Xmm)                                 // SSE2
+  ASMJIT_INST_2x(pand, Pand, Xmm, Mem)                                 // SSE2
+  ASMJIT_INST_2x(pandn, Pandn, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(pandn, Pandn, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(pandn, Pandn, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(pandn, Pandn, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(pavgb, Pavgb, Mm, Mm)                                 // SSE
+  ASMJIT_INST_2x(pavgb, Pavgb, Mm, Mem)                                // SSE
+  ASMJIT_INST_2x(pavgb, Pavgb, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(pavgb, Pavgb, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(pavgw, Pavgw, Mm, Mm)                                 // SSE
+  ASMJIT_INST_2x(pavgw, Pavgw, Mm, Mem)                                // SSE
+  ASMJIT_INST_2x(pavgw, Pavgw, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(pavgw, Pavgw, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_3x(pblendvb, Pblendvb, Xmm, Xmm, XMM0)                   // SSE4_1 [EXPLICIT]
+  ASMJIT_INST_3x(pblendvb, Pblendvb, Xmm, Mem, XMM0)                   // SSE4_1 [EXPLICIT]
+  ASMJIT_INST_3x(pblendw, Pblendw, Xmm, Xmm, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(pblendw, Pblendw, Xmm, Mem, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(pclmulqdq, Pclmulqdq, Xmm, Xmm, Imm)                  // PCLMULQDQ.
+  ASMJIT_INST_3x(pclmulqdq, Pclmulqdq, Xmm, Mem, Imm)                  // PCLMULQDQ.
+  ASMJIT_INST_6x(pcmpestri, Pcmpestri, Xmm, Xmm, Imm, Gp_ECX, Gp_EAX, Gp_EDX) // SSE4_2 [EXPLICIT]
+  ASMJIT_INST_6x(pcmpestri, Pcmpestri, Xmm, Mem, Imm, Gp_ECX, Gp_EAX, Gp_EDX) // SSE4_2 [EXPLICIT]
+  ASMJIT_INST_6x(pcmpestrm, Pcmpestrm, Xmm, Xmm, Imm, XMM0, Gp_EAX, Gp_EDX)   // SSE4_2 [EXPLICIT]
+  ASMJIT_INST_6x(pcmpestrm, Pcmpestrm, Xmm, Mem, Imm, XMM0, Gp_EAX, Gp_EDX)   // SSE4_2 [EXPLICIT]
+  ASMJIT_INST_2x(pcmpeqb, Pcmpeqb, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(pcmpeqb, Pcmpeqb, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(pcmpeqb, Pcmpeqb, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pcmpeqb, Pcmpeqb, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(pcmpeqd, Pcmpeqd, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(pcmpeqd, Pcmpeqd, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(pcmpeqd, Pcmpeqd, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pcmpeqd, Pcmpeqd, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(pcmpeqq, Pcmpeqq, Xmm, Xmm)                           // SSE4_1
+  ASMJIT_INST_2x(pcmpeqq, Pcmpeqq, Xmm, Mem)                           // SSE4_1
+  ASMJIT_INST_2x(pcmpeqw, Pcmpeqw, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(pcmpeqw, Pcmpeqw, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(pcmpeqw, Pcmpeqw, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pcmpeqw, Pcmpeqw, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(pcmpgtb, Pcmpgtb, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(pcmpgtb, Pcmpgtb, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(pcmpgtb, Pcmpgtb, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pcmpgtb, Pcmpgtb, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(pcmpgtd, Pcmpgtd, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(pcmpgtd, Pcmpgtd, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(pcmpgtd, Pcmpgtd, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pcmpgtd, Pcmpgtd, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(pcmpgtq, Pcmpgtq, Xmm, Xmm)                           // SSE4_2.
+  ASMJIT_INST_2x(pcmpgtq, Pcmpgtq, Xmm, Mem)                           // SSE4_2.
+  ASMJIT_INST_2x(pcmpgtw, Pcmpgtw, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(pcmpgtw, Pcmpgtw, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(pcmpgtw, Pcmpgtw, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pcmpgtw, Pcmpgtw, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_4x(pcmpistri, Pcmpistri, Xmm, Xmm, Imm, Gp_ECX)          // SSE4_2 [EXPLICIT]
+  ASMJIT_INST_4x(pcmpistri, Pcmpistri, Xmm, Mem, Imm, Gp_ECX)          // SSE4_2 [EXPLICIT]
+  ASMJIT_INST_4x(pcmpistrm, Pcmpistrm, Xmm, Xmm, Imm, XMM0)            // SSE4_2 [EXPLICIT]
+  ASMJIT_INST_4x(pcmpistrm, Pcmpistrm, Xmm, Mem, Imm, XMM0)            // SSE4_2 [EXPLICIT]
+  ASMJIT_INST_3x(pextrb, Pextrb, Gp, Xmm, Imm)                         // SSE4_1
+  ASMJIT_INST_3x(pextrb, Pextrb, Mem, Xmm, Imm)                        // SSE4_1
+  ASMJIT_INST_3x(pextrd, Pextrd, Gp, Xmm, Imm)                         // SSE4_1
+  ASMJIT_INST_3x(pextrd, Pextrd, Mem, Xmm, Imm)                        // SSE4_1
+  ASMJIT_INST_3x(pextrq, Pextrq, Gp, Xmm, Imm)                         // SSE4_1
+  ASMJIT_INST_3x(pextrq, Pextrq, Mem, Xmm, Imm)                        // SSE4_1
+  ASMJIT_INST_3x(pextrw, Pextrw, Gp, Mm, Imm)                          // SSE
+  ASMJIT_INST_3x(pextrw, Pextrw, Gp, Xmm, Imm)                         // SSE2
+  ASMJIT_INST_3x(pextrw, Pextrw, Mem, Xmm, Imm)                        // SSE4_1
+  ASMJIT_INST_2x(phaddd, Phaddd, Mm, Mm)                               // SSSE3
+  ASMJIT_INST_2x(phaddd, Phaddd, Mm, Mem)                              // SSSE3
+  ASMJIT_INST_2x(phaddd, Phaddd, Xmm, Xmm)                             // SSSE3
+  ASMJIT_INST_2x(phaddd, Phaddd, Xmm, Mem)                             // SSSE3
+  ASMJIT_INST_2x(phaddsw, Phaddsw, Mm, Mm)                             // SSSE3
+  ASMJIT_INST_2x(phaddsw, Phaddsw, Mm, Mem)                            // SSSE3
+  ASMJIT_INST_2x(phaddsw, Phaddsw, Xmm, Xmm)                           // SSSE3
+  ASMJIT_INST_2x(phaddsw, Phaddsw, Xmm, Mem)                           // SSSE3
+  ASMJIT_INST_2x(phaddw, Phaddw, Mm, Mm)                               // SSSE3
+  ASMJIT_INST_2x(phaddw, Phaddw, Mm, Mem)                              // SSSE3
+  ASMJIT_INST_2x(phaddw, Phaddw, Xmm, Xmm)                             // SSSE3
+  ASMJIT_INST_2x(phaddw, Phaddw, Xmm, Mem)                             // SSSE3
+  ASMJIT_INST_2x(phminposuw, Phminposuw, Xmm, Xmm)                     // SSE4_1
+  ASMJIT_INST_2x(phminposuw, Phminposuw, Xmm, Mem)                     // SSE4_1
+  ASMJIT_INST_2x(phsubd, Phsubd, Mm, Mm)                               // SSSE3
+  ASMJIT_INST_2x(phsubd, Phsubd, Mm, Mem)                              // SSSE3
+  ASMJIT_INST_2x(phsubd, Phsubd, Xmm, Xmm)                             // SSSE3
+  ASMJIT_INST_2x(phsubd, Phsubd, Xmm, Mem)                             // SSSE3
+  ASMJIT_INST_2x(phsubsw, Phsubsw, Mm, Mm)                             // SSSE3
+  ASMJIT_INST_2x(phsubsw, Phsubsw, Mm, Mem)                            // SSSE3
+  ASMJIT_INST_2x(phsubsw, Phsubsw, Xmm, Xmm)                           // SSSE3
+  ASMJIT_INST_2x(phsubsw, Phsubsw, Xmm, Mem)                           // SSSE3
+  ASMJIT_INST_2x(phsubw, Phsubw, Mm, Mm)                               // SSSE3
+  ASMJIT_INST_2x(phsubw, Phsubw, Mm, Mem)                              // SSSE3
+  ASMJIT_INST_2x(phsubw, Phsubw, Xmm, Xmm)                             // SSSE3
+  ASMJIT_INST_2x(phsubw, Phsubw, Xmm, Mem)                             // SSSE3
+  ASMJIT_INST_3x(pinsrb, Pinsrb, Xmm, Gp, Imm)                         // SSE4_1
+  ASMJIT_INST_3x(pinsrb, Pinsrb, Xmm, Mem, Imm)                        // SSE4_1
+  ASMJIT_INST_3x(pinsrd, Pinsrd, Xmm, Gp, Imm)                         // SSE4_1
+  ASMJIT_INST_3x(pinsrd, Pinsrd, Xmm, Mem, Imm)                        // SSE4_1
+  ASMJIT_INST_3x(pinsrq, Pinsrq, Xmm, Gp, Imm)                         // SSE4_1
+  ASMJIT_INST_3x(pinsrq, Pinsrq, Xmm, Mem, Imm)                        // SSE4_1
+  ASMJIT_INST_3x(pinsrw, Pinsrw, Mm, Gp, Imm)                          // SSE
+  ASMJIT_INST_3x(pinsrw, Pinsrw, Mm, Mem, Imm)                         // SSE
+  ASMJIT_INST_3x(pinsrw, Pinsrw, Xmm, Gp, Imm)                         // SSE2
+  ASMJIT_INST_3x(pinsrw, Pinsrw, Xmm, Mem, Imm)                        // SSE2
+  ASMJIT_INST_2x(pmaddubsw, Pmaddubsw, Mm, Mm)                         // SSSE3
+  ASMJIT_INST_2x(pmaddubsw, Pmaddubsw, Mm, Mem)                        // SSSE3
+  ASMJIT_INST_2x(pmaddubsw, Pmaddubsw, Xmm, Xmm)                       // SSSE3
+  ASMJIT_INST_2x(pmaddubsw, Pmaddubsw, Xmm, Mem)                       // SSSE3
+  ASMJIT_INST_2x(pmaddwd, Pmaddwd, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(pmaddwd, Pmaddwd, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(pmaddwd, Pmaddwd, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pmaddwd, Pmaddwd, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(pmaxsb, Pmaxsb, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pmaxsb, Pmaxsb, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pmaxsd, Pmaxsd, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pmaxsd, Pmaxsd, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pmaxsw, Pmaxsw, Mm, Mm)                               // SSE
+  ASMJIT_INST_2x(pmaxsw, Pmaxsw, Mm, Mem)                              // SSE
+  ASMJIT_INST_2x(pmaxsw, Pmaxsw, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(pmaxsw, Pmaxsw, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(pmaxub, Pmaxub, Mm, Mm)                               // SSE
+  ASMJIT_INST_2x(pmaxub, Pmaxub, Mm, Mem)                              // SSE
+  ASMJIT_INST_2x(pmaxub, Pmaxub, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(pmaxub, Pmaxub, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(pmaxud, Pmaxud, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pmaxud, Pmaxud, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pmaxuw, Pmaxuw, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pmaxuw, Pmaxuw, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pminsb, Pminsb, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pminsb, Pminsb, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pminsd, Pminsd, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pminsd, Pminsd, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pminsw, Pminsw, Mm, Mm)                               // SSE
+  ASMJIT_INST_2x(pminsw, Pminsw, Mm, Mem)                              // SSE
+  ASMJIT_INST_2x(pminsw, Pminsw, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(pminsw, Pminsw, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(pminub, Pminub, Mm, Mm)                               // SSE
+  ASMJIT_INST_2x(pminub, Pminub, Mm, Mem)                              // SSE
+  ASMJIT_INST_2x(pminub, Pminub, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(pminub, Pminub, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(pminud, Pminud, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pminud, Pminud, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pminuw, Pminuw, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pminuw, Pminuw, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pmovmskb, Pmovmskb, Gp, Mm)                           // SSE
+  ASMJIT_INST_2x(pmovmskb, Pmovmskb, Gp, Xmm)                          // SSE2
+  ASMJIT_INST_2x(pmovsxbd, Pmovsxbd, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxbd, Pmovsxbd, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxbq, Pmovsxbq, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxbq, Pmovsxbq, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxbw, Pmovsxbw, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxbw, Pmovsxbw, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxdq, Pmovsxdq, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxdq, Pmovsxdq, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxwd, Pmovsxwd, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxwd, Pmovsxwd, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxwq, Pmovsxwq, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxwq, Pmovsxwq, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxbd, Pmovzxbd, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxbd, Pmovzxbd, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxbq, Pmovzxbq, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxbq, Pmovzxbq, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxbw, Pmovzxbw, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxbw, Pmovzxbw, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxdq, Pmovzxdq, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxdq, Pmovzxdq, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxwd, Pmovzxwd, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxwd, Pmovzxwd, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxwq, Pmovzxwq, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxwq, Pmovzxwq, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmuldq, Pmuldq, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pmuldq, Pmuldq, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pmulhrsw, Pmulhrsw, Mm, Mm)                           // SSSE3
+  ASMJIT_INST_2x(pmulhrsw, Pmulhrsw, Mm, Mem)                          // SSSE3
+  ASMJIT_INST_2x(pmulhrsw, Pmulhrsw, Xmm, Xmm)                         // SSSE3
+  ASMJIT_INST_2x(pmulhrsw, Pmulhrsw, Xmm, Mem)                         // SSSE3
+  ASMJIT_INST_2x(pmulhw, Pmulhw, Mm, Mm)                               // MMX
+  ASMJIT_INST_2x(pmulhw, Pmulhw, Mm, Mem)                              // MMX
+  ASMJIT_INST_2x(pmulhw, Pmulhw, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(pmulhw, Pmulhw, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(pmulhuw, Pmulhuw, Mm, Mm)                             // SSE
+  ASMJIT_INST_2x(pmulhuw, Pmulhuw, Mm, Mem)                            // SSE
+  ASMJIT_INST_2x(pmulhuw, Pmulhuw, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pmulhuw, Pmulhuw, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(pmulld, Pmulld, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pmulld, Pmulld, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pmullw, Pmullw, Mm, Mm)                               // MMX
+  ASMJIT_INST_2x(pmullw, Pmullw, Mm, Mem)                              // MMX
+  ASMJIT_INST_2x(pmullw, Pmullw, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(pmullw, Pmullw, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(pmuludq, Pmuludq, Mm, Mm)                             // SSE2
+  ASMJIT_INST_2x(pmuludq, Pmuludq, Mm, Mem)                            // SSE2
+  ASMJIT_INST_2x(pmuludq, Pmuludq, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pmuludq, Pmuludq, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(por, Por, Mm, Mm)                                     // MMX
+  ASMJIT_INST_2x(por, Por, Mm, Mem)                                    // MMX
+  ASMJIT_INST_2x(por, Por, Xmm, Xmm)                                   // SSE2
+  ASMJIT_INST_2x(por, Por, Xmm, Mem)                                   // SSE2
+  ASMJIT_INST_2x(psadbw, Psadbw, Mm, Mm)                               // SSE
+  ASMJIT_INST_2x(psadbw, Psadbw, Mm, Mem)                              // SSE
+  ASMJIT_INST_2x(psadbw, Psadbw, Xmm, Xmm)                             // SSE
+  ASMJIT_INST_2x(psadbw, Psadbw, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(pslld, Pslld, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(pslld, Pslld, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(pslld, Pslld, Mm, Imm)                                // MMX
+  ASMJIT_INST_2x(pslld, Pslld, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(pslld, Pslld, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(pslld, Pslld, Xmm, Imm)                               // SSE2
+  ASMJIT_INST_2x(pslldq, Pslldq, Xmm, Imm)                             // SSE2
+  ASMJIT_INST_2x(psllq, Psllq, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psllq, Psllq, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psllq, Psllq, Mm, Imm)                                // MMX
+  ASMJIT_INST_2x(psllq, Psllq, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psllq, Psllq, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psllq, Psllq, Xmm, Imm)                               // SSE2
+  ASMJIT_INST_2x(psllw, Psllw, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psllw, Psllw, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psllw, Psllw, Mm, Imm)                                // MMX
+  ASMJIT_INST_2x(psllw, Psllw, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psllw, Psllw, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psllw, Psllw, Xmm, Imm)                               // SSE2
+  ASMJIT_INST_2x(psrad, Psrad, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psrad, Psrad, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psrad, Psrad, Mm, Imm)                                // MMX
+  ASMJIT_INST_2x(psrad, Psrad, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psrad, Psrad, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psrad, Psrad, Xmm, Imm)                               // SSE2
+  ASMJIT_INST_2x(psraw, Psraw, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psraw, Psraw, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psraw, Psraw, Mm, Imm)                                // MMX
+  ASMJIT_INST_2x(psraw, Psraw, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psraw, Psraw, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psraw, Psraw, Xmm, Imm)                               // SSE2
+  ASMJIT_INST_2x(pshufb, Pshufb, Mm, Mm)                               // SSSE3
+  ASMJIT_INST_2x(pshufb, Pshufb, Mm, Mem)                              // SSSE3
+  ASMJIT_INST_2x(pshufb, Pshufb, Xmm, Xmm)                             // SSSE3
+  ASMJIT_INST_2x(pshufb, Pshufb, Xmm, Mem)                             // SSSE3
+  ASMJIT_INST_3x(pshufd, Pshufd, Xmm, Xmm, Imm)                        // SSE2
+  ASMJIT_INST_3x(pshufd, Pshufd, Xmm, Mem, Imm)                        // SSE2
+  ASMJIT_INST_3x(pshufhw, Pshufhw, Xmm, Xmm, Imm)                      // SSE2
+  ASMJIT_INST_3x(pshufhw, Pshufhw, Xmm, Mem, Imm)                      // SSE2
+  ASMJIT_INST_3x(pshuflw, Pshuflw, Xmm, Xmm, Imm)                      // SSE2
+  ASMJIT_INST_3x(pshuflw, Pshuflw, Xmm, Mem, Imm)                      // SSE2
+  ASMJIT_INST_3x(pshufw, Pshufw, Mm, Mm, Imm)                          // SSE
+  ASMJIT_INST_3x(pshufw, Pshufw, Mm, Mem, Imm)                         // SSE
+  ASMJIT_INST_2x(psignb, Psignb, Mm, Mm)                               // SSSE3
+  ASMJIT_INST_2x(psignb, Psignb, Mm, Mem)                              // SSSE3
+  ASMJIT_INST_2x(psignb, Psignb, Xmm, Xmm)                             // SSSE3
+  ASMJIT_INST_2x(psignb, Psignb, Xmm, Mem)                             // SSSE3
+  ASMJIT_INST_2x(psignd, Psignd, Mm, Mm)                               // SSSE3
+  ASMJIT_INST_2x(psignd, Psignd, Mm, Mem)                              // SSSE3
+  ASMJIT_INST_2x(psignd, Psignd, Xmm, Xmm)                             // SSSE3
+  ASMJIT_INST_2x(psignd, Psignd, Xmm, Mem)                             // SSSE3
+  ASMJIT_INST_2x(psignw, Psignw, Mm, Mm)                               // SSSE3
+  ASMJIT_INST_2x(psignw, Psignw, Mm, Mem)                              // SSSE3
+  ASMJIT_INST_2x(psignw, Psignw, Xmm, Xmm)                             // SSSE3
+  ASMJIT_INST_2x(psignw, Psignw, Xmm, Mem)                             // SSSE3
+  ASMJIT_INST_2x(psrld, Psrld, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psrld, Psrld, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psrld, Psrld, Mm, Imm)                                // MMX
+  ASMJIT_INST_2x(psrld, Psrld, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psrld, Psrld, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psrld, Psrld, Xmm, Imm)                               // SSE2
+  ASMJIT_INST_2x(psrldq, Psrldq, Xmm, Imm)                             // SSE2
+  ASMJIT_INST_2x(psrlq, Psrlq, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psrlq, Psrlq, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psrlq, Psrlq, Mm, Imm)                                // MMX
+  ASMJIT_INST_2x(psrlq, Psrlq, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psrlq, Psrlq, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psrlq, Psrlq, Xmm, Imm)                               // SSE2
+  ASMJIT_INST_2x(psrlw, Psrlw, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psrlw, Psrlw, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psrlw, Psrlw, Mm, Imm)                                // MMX
+  ASMJIT_INST_2x(psrlw, Psrlw, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psrlw, Psrlw, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psrlw, Psrlw, Xmm, Imm)                               // SSE2
+  ASMJIT_INST_2x(psubb, Psubb, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psubb, Psubb, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psubb, Psubb, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psubb, Psubb, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psubd, Psubd, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psubd, Psubd, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psubd, Psubd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psubd, Psubd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psubq, Psubq, Mm, Mm)                                 // SSE2
+  ASMJIT_INST_2x(psubq, Psubq, Mm, Mem)                                // SSE2
+  ASMJIT_INST_2x(psubq, Psubq, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psubq, Psubq, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psubsb, Psubsb, Mm, Mm)                               // MMX
+  ASMJIT_INST_2x(psubsb, Psubsb, Mm, Mem)                              // MMX
+  ASMJIT_INST_2x(psubsb, Psubsb, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(psubsb, Psubsb, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(psubsw, Psubsw, Mm, Mm)                               // MMX
+  ASMJIT_INST_2x(psubsw, Psubsw, Mm, Mem)                              // MMX
+  ASMJIT_INST_2x(psubsw, Psubsw, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(psubsw, Psubsw, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(psubusb, Psubusb, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(psubusb, Psubusb, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(psubusb, Psubusb, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(psubusb, Psubusb, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(psubusw, Psubusw, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(psubusw, Psubusw, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(psubusw, Psubusw, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(psubusw, Psubusw, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(psubw, Psubw, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psubw, Psubw, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psubw, Psubw, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psubw, Psubw, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(ptest, Ptest, Xmm, Xmm)                               // SSE4_1
+  ASMJIT_INST_2x(ptest, Ptest, Xmm, Mem)                               // SSE4_1
+  ASMJIT_INST_2x(punpckhbw, Punpckhbw, Mm, Mm)                         // MMX
+  ASMJIT_INST_2x(punpckhbw, Punpckhbw, Mm, Mem)                        // MMX
+  ASMJIT_INST_2x(punpckhbw, Punpckhbw, Xmm, Xmm)                       // SSE2
+  ASMJIT_INST_2x(punpckhbw, Punpckhbw, Xmm, Mem)                       // SSE2
+  ASMJIT_INST_2x(punpckhdq, Punpckhdq, Mm, Mm)                         // MMX
+  ASMJIT_INST_2x(punpckhdq, Punpckhdq, Mm, Mem)                        // MMX
+  ASMJIT_INST_2x(punpckhdq, Punpckhdq, Xmm, Xmm)                       // SSE2
+  ASMJIT_INST_2x(punpckhdq, Punpckhdq, Xmm, Mem)                       // SSE2
+  ASMJIT_INST_2x(punpckhqdq, Punpckhqdq, Xmm, Xmm)                     // SSE2
+  ASMJIT_INST_2x(punpckhqdq, Punpckhqdq, Xmm, Mem)                     // SSE2
+  ASMJIT_INST_2x(punpckhwd, Punpckhwd, Mm, Mm)                         // MMX
+  ASMJIT_INST_2x(punpckhwd, Punpckhwd, Mm, Mem)                        // MMX
+  ASMJIT_INST_2x(punpckhwd, Punpckhwd, Xmm, Xmm)                       // SSE2
+  ASMJIT_INST_2x(punpckhwd, Punpckhwd, Xmm, Mem)                       // SSE2
+  ASMJIT_INST_2x(punpcklbw, Punpcklbw, Mm, Mm)                         // MMX
+  ASMJIT_INST_2x(punpcklbw, Punpcklbw, Mm, Mem)                        // MMX
+  ASMJIT_INST_2x(punpcklbw, Punpcklbw, Xmm, Xmm)                       // SSE2
+  ASMJIT_INST_2x(punpcklbw, Punpcklbw, Xmm, Mem)                       // SSE2
+  ASMJIT_INST_2x(punpckldq, Punpckldq, Mm, Mm)                         // MMX
+  ASMJIT_INST_2x(punpckldq, Punpckldq, Mm, Mem)                        // MMX
+  ASMJIT_INST_2x(punpckldq, Punpckldq, Xmm, Xmm)                       // SSE2
+  ASMJIT_INST_2x(punpckldq, Punpckldq, Xmm, Mem)                       // SSE2
+  ASMJIT_INST_2x(punpcklqdq, Punpcklqdq, Xmm, Xmm)                     // SSE2
+  ASMJIT_INST_2x(punpcklqdq, Punpcklqdq, Xmm, Mem)                     // SSE2
+  ASMJIT_INST_2x(punpcklwd, Punpcklwd, Mm, Mm)                         // MMX
+  ASMJIT_INST_2x(punpcklwd, Punpcklwd, Mm, Mem)                        // MMX
+  ASMJIT_INST_2x(punpcklwd, Punpcklwd, Xmm, Xmm)                       // SSE2
+  ASMJIT_INST_2x(punpcklwd, Punpcklwd, Xmm, Mem)                       // SSE2
+  ASMJIT_INST_2x(pxor, Pxor, Mm, Mm)                                   // MMX
+  ASMJIT_INST_2x(pxor, Pxor, Mm, Mem)                                  // MMX
+  ASMJIT_INST_2x(pxor, Pxor, Xmm, Xmm)                                 // SSE2
+  ASMJIT_INST_2x(pxor, Pxor, Xmm, Mem)                                 // SSE2
+  ASMJIT_INST_2x(rcpps, Rcpps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(rcpps, Rcpps, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(rcpss, Rcpss, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(rcpss, Rcpss, Xmm, Mem)                               // SSE
+  ASMJIT_INST_3x(roundpd, Roundpd, Xmm, Xmm, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(roundpd, Roundpd, Xmm, Mem, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(roundps, Roundps, Xmm, Xmm, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(roundps, Roundps, Xmm, Mem, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(roundsd, Roundsd, Xmm, Xmm, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(roundsd, Roundsd, Xmm, Mem, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(roundss, Roundss, Xmm, Xmm, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(roundss, Roundss, Xmm, Mem, Imm)                      // SSE4_1
+  ASMJIT_INST_2x(rsqrtps, Rsqrtps, Xmm, Xmm)                           // SSE
+  ASMJIT_INST_2x(rsqrtps, Rsqrtps, Xmm, Mem)                           // SSE
+  ASMJIT_INST_2x(rsqrtss, Rsqrtss, Xmm, Xmm)                           // SSE
+  ASMJIT_INST_2x(rsqrtss, Rsqrtss, Xmm, Mem)                           // SSE
+  ASMJIT_INST_3x(shufpd, Shufpd, Xmm, Xmm, Imm)                        // SSE2
+  ASMJIT_INST_3x(shufpd, Shufpd, Xmm, Mem, Imm)                        // SSE2
+  ASMJIT_INST_3x(shufps, Shufps, Xmm, Xmm, Imm)                        // SSE
+  ASMJIT_INST_3x(shufps, Shufps, Xmm, Mem, Imm)                        // SSE
+  ASMJIT_INST_2x(sqrtpd, Sqrtpd, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(sqrtpd, Sqrtpd, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(sqrtps, Sqrtps, Xmm, Xmm)                             // SSE
+  ASMJIT_INST_2x(sqrtps, Sqrtps, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(sqrtsd, Sqrtsd, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(sqrtsd, Sqrtsd, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(sqrtss, Sqrtss, Xmm, Xmm)                             // SSE
+  ASMJIT_INST_2x(sqrtss, Sqrtss, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(subpd, Subpd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(subpd, Subpd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(subps, Subps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(subps, Subps, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(subsd, Subsd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(subsd, Subsd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(subss, Subss, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(subss, Subss, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(ucomisd, Ucomisd, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(ucomisd, Ucomisd, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(ucomiss, Ucomiss, Xmm, Xmm)                           // SSE
+  ASMJIT_INST_2x(ucomiss, Ucomiss, Xmm, Mem)                           // SSE
+  ASMJIT_INST_2x(unpckhpd, Unpckhpd, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(unpckhpd, Unpckhpd, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(unpckhps, Unpckhps, Xmm, Xmm)                         // SSE
+  ASMJIT_INST_2x(unpckhps, Unpckhps, Xmm, Mem)                         // SSE
+  ASMJIT_INST_2x(unpcklpd, Unpcklpd, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(unpcklpd, Unpcklpd, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(unpcklps, Unpcklps, Xmm, Xmm)                         // SSE
+  ASMJIT_INST_2x(unpcklps, Unpcklps, Xmm, Mem)                         // SSE
+  ASMJIT_INST_2x(xorpd, Xorpd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(xorpd, Xorpd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(xorps, Xorps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(xorps, Xorps, Xmm, Mem)                               // SSE
+
+  //! \}
+
+  //! \name 3DNOW and GEODE Instructions (Deprecated)
+  //! \{
+
+  ASMJIT_INST_2x(pavgusb, Pavgusb, Mm, Mm)                             // 3DNOW
+  ASMJIT_INST_2x(pavgusb, Pavgusb, Mm, Mem)                            // 3DNOW
+  ASMJIT_INST_2x(pf2id, Pf2id, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pf2id, Pf2id, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pf2iw, Pf2iw, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pf2iw, Pf2iw, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pfacc, Pfacc, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pfacc, Pfacc, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pfadd, Pfadd, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pfadd, Pfadd, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pfcmpeq, Pfcmpeq, Mm, Mm)                             // 3DNOW
+  ASMJIT_INST_2x(pfcmpeq, Pfcmpeq, Mm, Mem)                            // 3DNOW
+  ASMJIT_INST_2x(pfcmpge, Pfcmpge, Mm, Mm)                             // 3DNOW
+  ASMJIT_INST_2x(pfcmpge, Pfcmpge, Mm, Mem)                            // 3DNOW
+  ASMJIT_INST_2x(pfcmpgt, Pfcmpgt, Mm, Mm)                             // 3DNOW
+  ASMJIT_INST_2x(pfcmpgt, Pfcmpgt, Mm, Mem)                            // 3DNOW
+  ASMJIT_INST_2x(pfmax, Pfmax, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pfmax, Pfmax, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pfmin, Pfmin, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pfmin, Pfmin, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pfmul, Pfmul, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pfmul, Pfmul, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pfnacc, Pfnacc, Mm, Mm)                               // 3DNOW
+  ASMJIT_INST_2x(pfnacc, Pfnacc, Mm, Mem)                              // 3DNOW
+  ASMJIT_INST_2x(pfpnacc, Pfpnacc, Mm, Mm)                             // 3DNOW
+  ASMJIT_INST_2x(pfpnacc, Pfpnacc, Mm, Mem)                            // 3DNOW
+  ASMJIT_INST_2x(pfrcp, Pfrcp, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pfrcp, Pfrcp, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pfrcpit1, Pfrcpit1, Mm, Mm)                           // 3DNOW
+  ASMJIT_INST_2x(pfrcpit1, Pfrcpit1, Mm, Mem)                          // 3DNOW
+  ASMJIT_INST_2x(pfrcpit2, Pfrcpit2, Mm, Mm)                           // 3DNOW
+  ASMJIT_INST_2x(pfrcpit2, Pfrcpit2, Mm, Mem)                          // 3DNOW
+  ASMJIT_INST_2x(pfrcpv, Pfrcpv, Mm, Mm)                               // GEODE
+  ASMJIT_INST_2x(pfrcpv, Pfrcpv, Mm, Mem)                              // GEODE
+  ASMJIT_INST_2x(pfrsqit1, Pfrsqit1, Mm, Mm)                           // 3DNOW
+  ASMJIT_INST_2x(pfrsqit1, Pfrsqit1, Mm, Mem)                          // 3DNOW
+  ASMJIT_INST_2x(pfrsqrt, Pfrsqrt, Mm, Mm)                             // 3DNOW
+  ASMJIT_INST_2x(pfrsqrt, Pfrsqrt, Mm, Mem)                            // 3DNOW
+  ASMJIT_INST_2x(pfrsqrtv, Pfrsqrtv, Mm, Mm)                           // GEODE
+  ASMJIT_INST_2x(pfrsqrtv, Pfrsqrtv, Mm, Mem)                          // GEODE
+  ASMJIT_INST_2x(pfsub, Pfsub, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pfsub, Pfsub, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pfsubr, Pfsubr, Mm, Mm)                               // 3DNOW
+  ASMJIT_INST_2x(pfsubr, Pfsubr, Mm, Mem)                              // 3DNOW
+  ASMJIT_INST_2x(pi2fd, Pi2fd, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pi2fd, Pi2fd, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pi2fw, Pi2fw, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pi2fw, Pi2fw, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pmulhrw, Pmulhrw, Mm, Mm)                             // 3DNOW
+  ASMJIT_INST_2x(pmulhrw, Pmulhrw, Mm, Mem)                            // 3DNOW
+  ASMJIT_INST_2x(pswapd, Pswapd, Mm, Mm)                               // 3DNOW
+  ASMJIT_INST_2x(pswapd, Pswapd, Mm, Mem)                              // 3DNOW
+
+  //! \}
+
+  //! \name EMMS/FEMMS Instructions
+  //! \{
+
+  ASMJIT_INST_0x(emms, Emms)                                           // MMX
+  ASMJIT_INST_0x(femms, Femms)                                         // 3DNOW
+
+  //! \}
+
+  //! \name AESNI Instructions
+  //! \{
+
+  ASMJIT_INST_2x(aesdec, Aesdec, Xmm, Xmm)                             // AESNI
+  ASMJIT_INST_2x(aesdec, Aesdec, Xmm, Mem)                             // AESNI
+  ASMJIT_INST_2x(aesdeclast, Aesdeclast, Xmm, Xmm)                     // AESNI
+  ASMJIT_INST_2x(aesdeclast, Aesdeclast, Xmm, Mem)                     // AESNI
+  ASMJIT_INST_2x(aesenc, Aesenc, Xmm, Xmm)                             // AESNI
+  ASMJIT_INST_2x(aesenc, Aesenc, Xmm, Mem)                             // AESNI
+  ASMJIT_INST_2x(aesenclast, Aesenclast, Xmm, Xmm)                     // AESNI
+  ASMJIT_INST_2x(aesenclast, Aesenclast, Xmm, Mem)                     // AESNI
+  ASMJIT_INST_2x(aesimc, Aesimc, Xmm, Xmm)                             // AESNI
+  ASMJIT_INST_2x(aesimc, Aesimc, Xmm, Mem)                             // AESNI
+  ASMJIT_INST_3x(aeskeygenassist, Aeskeygenassist, Xmm, Xmm, Imm)      // AESNI
+  ASMJIT_INST_3x(aeskeygenassist, Aeskeygenassist, Xmm, Mem, Imm)      // AESNI
+
+  //! \}
+
+  //! \name SHA Instructions
+  //! \{
+
+  ASMJIT_INST_2x(sha1msg1, Sha1msg1, Xmm, Xmm)                         // SHA
+  ASMJIT_INST_2x(sha1msg1, Sha1msg1, Xmm, Mem)                         // SHA
+  ASMJIT_INST_2x(sha1msg2, Sha1msg2, Xmm, Xmm)                         // SHA
+  ASMJIT_INST_2x(sha1msg2, Sha1msg2, Xmm, Mem)                         // SHA
+  ASMJIT_INST_2x(sha1nexte, Sha1nexte, Xmm, Xmm)                       // SHA
+  ASMJIT_INST_2x(sha1nexte, Sha1nexte, Xmm, Mem)                       // SHA
+  ASMJIT_INST_3x(sha1rnds4, Sha1rnds4, Xmm, Xmm, Imm)                  // SHA
+  ASMJIT_INST_3x(sha1rnds4, Sha1rnds4, Xmm, Mem, Imm)                  // SHA
+  ASMJIT_INST_2x(sha256msg1, Sha256msg1, Xmm, Xmm)                     // SHA
+  ASMJIT_INST_2x(sha256msg1, Sha256msg1, Xmm, Mem)                     // SHA
+  ASMJIT_INST_2x(sha256msg2, Sha256msg2, Xmm, Xmm)                     // SHA
+  ASMJIT_INST_2x(sha256msg2, Sha256msg2, Xmm, Mem)                     // SHA
+  ASMJIT_INST_3x(sha256rnds2, Sha256rnds2, Xmm, Xmm, XMM0)             // SHA [EXPLICIT]
+  ASMJIT_INST_3x(sha256rnds2, Sha256rnds2, Xmm, Mem, XMM0)             // SHA [EXPLICIT]
+
+  //! \}
+
+  //! \name GFNI Instructions
+  //! \{
+
+  // NOTE: For some reason Doxygen is messed up here and thinks we are in cond.
+  //! \endcond
+
+  ASMJIT_INST_3x(gf2p8affineinvqb, Gf2p8affineinvqb, Xmm, Xmm, Imm)    // GFNI
+  ASMJIT_INST_3x(gf2p8affineinvqb, Gf2p8affineinvqb, Xmm, Mem, Imm)    // GFNI
+  ASMJIT_INST_3x(gf2p8affineqb, Gf2p8affineqb, Xmm, Xmm, Imm)          // GFNI
+  ASMJIT_INST_3x(gf2p8affineqb, Gf2p8affineqb, Xmm, Mem, Imm)          // GFNI
+  ASMJIT_INST_2x(gf2p8mulb, Gf2p8mulb, Xmm, Xmm)                       // GFNI
+  ASMJIT_INST_2x(gf2p8mulb, Gf2p8mulb, Xmm, Mem)                       // GFNI
+
+  //! \}
+
+  //! \name AVX, FMA, and AVX512 Instructions
+  //! \{
+
+  ASMJIT_INST_3x(kaddb, Kaddb, KReg, KReg, KReg)                       // AVX512_DQ
+  ASMJIT_INST_3x(kaddd, Kaddd, KReg, KReg, KReg)                       // AVX512_BW
+  ASMJIT_INST_3x(kaddq, Kaddq, KReg, KReg, KReg)                       // AVX512_BW
+  ASMJIT_INST_3x(kaddw, Kaddw, KReg, KReg, KReg)                       // AVX512_DQ
+  ASMJIT_INST_3x(kandb, Kandb, KReg, KReg, KReg)                       // AVX512_DQ
+  ASMJIT_INST_3x(kandd, Kandd, KReg, KReg, KReg)                       // AVX512_BW
+  ASMJIT_INST_3x(kandnb, Kandnb, KReg, KReg, KReg)                     // AVX512_DQ
+  ASMJIT_INST_3x(kandnd, Kandnd, KReg, KReg, KReg)                     // AVX512_BW
+  ASMJIT_INST_3x(kandnq, Kandnq, KReg, KReg, KReg)                     // AVX512_BW
+  ASMJIT_INST_3x(kandnw, Kandnw, KReg, KReg, KReg)                     // AVX512_F
+  ASMJIT_INST_3x(kandq, Kandq, KReg, KReg, KReg)                       // AVX512_BW
+  ASMJIT_INST_3x(kandw, Kandw, KReg, KReg, KReg)                       // AVX512_F
+  ASMJIT_INST_2x(kmovb, Kmovb, KReg, KReg)                             // AVX512_DQ
+  ASMJIT_INST_2x(kmovb, Kmovb, KReg, Mem)                              // AVX512_DQ
+  ASMJIT_INST_2x(kmovb, Kmovb, KReg, Gp)                               // AVX512_DQ
+  ASMJIT_INST_2x(kmovb, Kmovb, Mem, KReg)                              // AVX512_DQ
+  ASMJIT_INST_2x(kmovb, Kmovb, Gp, KReg)                               // AVX512_DQ
+  ASMJIT_INST_2x(kmovd, Kmovd, KReg, KReg)                             // AVX512_BW
+  ASMJIT_INST_2x(kmovd, Kmovd, KReg, Mem)                              // AVX512_BW
+  ASMJIT_INST_2x(kmovd, Kmovd, KReg, Gp)                               // AVX512_BW
+  ASMJIT_INST_2x(kmovd, Kmovd, Mem, KReg)                              // AVX512_BW
+  ASMJIT_INST_2x(kmovd, Kmovd, Gp, KReg)                               // AVX512_BW
+  ASMJIT_INST_2x(kmovq, Kmovq, KReg, KReg)                             // AVX512_BW
+  ASMJIT_INST_2x(kmovq, Kmovq, KReg, Mem)                              // AVX512_BW
+  ASMJIT_INST_2x(kmovq, Kmovq, KReg, Gp)                               // AVX512_BW
+  ASMJIT_INST_2x(kmovq, Kmovq, Mem, KReg)                              // AVX512_BW
+  ASMJIT_INST_2x(kmovq, Kmovq, Gp, KReg)                               // AVX512_BW
+  ASMJIT_INST_2x(kmovw, Kmovw, KReg, KReg)                             // AVX512_F
+  ASMJIT_INST_2x(kmovw, Kmovw, KReg, Mem)                              // AVX512_F
+  ASMJIT_INST_2x(kmovw, Kmovw, KReg, Gp)                               // AVX512_F
+  ASMJIT_INST_2x(kmovw, Kmovw, Mem, KReg)                              // AVX512_F
+  ASMJIT_INST_2x(kmovw, Kmovw, Gp, KReg)                               // AVX512_F
+  ASMJIT_INST_2x(knotb, Knotb, KReg, KReg)                             // AVX512_DQ
+  ASMJIT_INST_2x(knotd, Knotd, KReg, KReg)                             // AVX512_BW
+  ASMJIT_INST_2x(knotq, Knotq, KReg, KReg)                             // AVX512_BW
+  ASMJIT_INST_2x(knotw, Knotw, KReg, KReg)                             // AVX512_F
+  ASMJIT_INST_3x(korb, Korb, KReg, KReg, KReg)                         // AVX512_DQ
+  ASMJIT_INST_3x(kord, Kord, KReg, KReg, KReg)                         // AVX512_BW
+  ASMJIT_INST_3x(korq, Korq, KReg, KReg, KReg)                         // AVX512_BW
+  ASMJIT_INST_2x(kortestb, Kortestb, KReg, KReg)                       // AVX512_DQ
+  ASMJIT_INST_2x(kortestd, Kortestd, KReg, KReg)                       // AVX512_BW
+  ASMJIT_INST_2x(kortestq, Kortestq, KReg, KReg)                       // AVX512_BW
+  ASMJIT_INST_2x(kortestw, Kortestw, KReg, KReg)                       // AVX512_F
+  ASMJIT_INST_3x(korw, Korw, KReg, KReg, KReg)                         // AVX512_F
+  ASMJIT_INST_3x(kshiftlb, Kshiftlb, KReg, KReg, Imm)                  // AVX512_DQ
+  ASMJIT_INST_3x(kshiftld, Kshiftld, KReg, KReg, Imm)                  // AVX512_BW
+  ASMJIT_INST_3x(kshiftlq, Kshiftlq, KReg, KReg, Imm)                  // AVX512_BW
+  ASMJIT_INST_3x(kshiftlw, Kshiftlw, KReg, KReg, Imm)                  // AVX512_F
+  ASMJIT_INST_3x(kshiftrb, Kshiftrb, KReg, KReg, Imm)                  // AVX512_DQ
+  ASMJIT_INST_3x(kshiftrd, Kshiftrd, KReg, KReg, Imm)                  // AVX512_BW
+  ASMJIT_INST_3x(kshiftrq, Kshiftrq, KReg, KReg, Imm)                  // AVX512_BW
+  ASMJIT_INST_3x(kshiftrw, Kshiftrw, KReg, KReg, Imm)                  // AVX512_F
+  ASMJIT_INST_2x(ktestb, Ktestb, KReg, KReg)                           // AVX512_DQ
+  ASMJIT_INST_2x(ktestd, Ktestd, KReg, KReg)                           // AVX512_BW
+  ASMJIT_INST_2x(ktestq, Ktestq, KReg, KReg)                           // AVX512_BW
+  ASMJIT_INST_2x(ktestw, Ktestw, KReg, KReg)                           // AVX512_DQ
+  ASMJIT_INST_3x(kunpckbw, Kunpckbw, KReg, KReg, KReg)                 // AVX512_F
+  ASMJIT_INST_3x(kunpckdq, Kunpckdq, KReg, KReg, KReg)                 // AVX512_BW
+  ASMJIT_INST_3x(kunpckwd, Kunpckwd, KReg, KReg, KReg)                 // AVX512_BW
+  ASMJIT_INST_3x(kxnorb, Kxnorb, KReg, KReg, KReg)                     // AVX512_DQ
+  ASMJIT_INST_3x(kxnord, Kxnord, KReg, KReg, KReg)                     // AVX512_BW
+  ASMJIT_INST_3x(kxnorq, Kxnorq, KReg, KReg, KReg)                     // AVX512_BW
+  ASMJIT_INST_3x(kxnorw, Kxnorw, KReg, KReg, KReg)                     // AVX512_F
+  ASMJIT_INST_3x(kxorb, Kxorb, KReg, KReg, KReg)                       // AVX512_DQ
+  ASMJIT_INST_3x(kxord, Kxord, KReg, KReg, KReg)                       // AVX512_BW
+  ASMJIT_INST_3x(kxorq, Kxorq, KReg, KReg, KReg)                       // AVX512_BW
+  ASMJIT_INST_3x(kxorw, Kxorw, KReg, KReg, KReg)                       // AVX512_F
+  ASMJIT_INST_6x(v4fmaddps, V4fmaddps, Zmm, Zmm, Zmm, Zmm, Zmm, Mem)   // AVX512_4FMAPS{kz}
+  ASMJIT_INST_6x(v4fmaddss, V4fmaddss, Xmm, Xmm, Xmm, Xmm, Xmm, Mem)   // AVX512_4FMAPS{kz}
+  ASMJIT_INST_6x(v4fnmaddps, V4fnmaddps, Zmm, Zmm, Zmm, Zmm, Zmm, Mem) // AVX512_4FMAPS{kz}
+  ASMJIT_INST_6x(v4fnmaddss, V4fnmaddss, Xmm, Xmm, Xmm, Xmm, Xmm, Mem) // AVX512_4FMAPS{kz}
+  ASMJIT_INST_3x(vaddpd, Vaddpd, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vaddpd, Vaddpd, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vaddps, Vaddps, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vaddps, Vaddps, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vaddsd, Vaddsd, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vaddsd, Vaddsd, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vaddss, Vaddss, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vaddss, Vaddss, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vaddsubpd, Vaddsubpd, Vec, Vec, Vec)                  // AVX
+  ASMJIT_INST_3x(vaddsubpd, Vaddsubpd, Vec, Vec, Mem)                  // AVX
+  ASMJIT_INST_3x(vaddsubps, Vaddsubps, Vec, Vec, Vec)                  // AVX
+  ASMJIT_INST_3x(vaddsubps, Vaddsubps, Vec, Vec, Mem)                  // AVX
+  ASMJIT_INST_3x(vaesdec, Vaesdec, Vec, Vec, Vec)                      // AVX+AESNI VAES
+  ASMJIT_INST_3x(vaesdec, Vaesdec, Vec, Vec, Mem)                      // AVX+AESNI VAES
+  ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Vec, Vec, Vec)              // AVX+AESNI VAES
+  ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Vec, Vec, Mem)              // AVX+AESNI VAES
+  ASMJIT_INST_3x(vaesenc, Vaesenc, Vec, Vec, Vec)                      // AVX+AESNI VAES
+  ASMJIT_INST_3x(vaesenc, Vaesenc, Vec, Vec, Mem)                      // AVX+AESNI VAES
+  ASMJIT_INST_3x(vaesenclast, Vaesenclast, Vec, Vec, Vec)              // AVX+AESNI VAES
+  ASMJIT_INST_3x(vaesenclast, Vaesenclast, Vec, Vec, Mem)              // AVX+AESNI VAES
+  ASMJIT_INST_2x(vaesimc, Vaesimc, Xmm, Xmm)                           // AVX+AESNI
+  ASMJIT_INST_2x(vaesimc, Vaesimc, Xmm, Mem)                           // AVX+AESNI
+  ASMJIT_INST_3x(vaeskeygenassist, Vaeskeygenassist, Xmm, Xmm, Imm)    // AVX+AESNI
+  ASMJIT_INST_3x(vaeskeygenassist, Vaeskeygenassist, Xmm, Mem, Imm)    // AVX+AESNI
+  ASMJIT_INST_4x(valignd, Valignd, Vec, Vec, Vec, Imm)                 //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(valignd, Valignd, Vec, Vec, Mem, Imm)                 //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(valignq, Valignq, Vec, Vec, Vec, Imm)                 //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(valignq, Valignq, Vec, Vec, Mem, Imm)                 //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vandnpd, Vandnpd, Vec, Vec, Vec)                      // AVX  AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vandnpd, Vandnpd, Vec, Vec, Mem)                      // AVX  AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vandnps, Vandnps, Vec, Vec, Vec)                      // AVX  AVX512_DQ{kz|b32}
+  ASMJIT_INST_3x(vandnps, Vandnps, Vec, Vec, Mem)                      // AVX  AVX512_DQ{kz|b32}
+  ASMJIT_INST_3x(vandpd, Vandpd, Vec, Vec, Vec)                        // AVX  AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vandpd, Vandpd, Vec, Vec, Mem)                        // AVX  AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vandps, Vandps, Vec, Vec, Vec)                        // AVX  AVX512_DQ{kz|b32}
+  ASMJIT_INST_3x(vandps, Vandps, Vec, Vec, Mem)                        // AVX  AVX512_DQ{kz|b32}
+  ASMJIT_INST_3x(vblendmpd, Vblendmpd, Vec, Vec, Vec)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vblendmpd, Vblendmpd, Vec, Vec, Mem)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vblendmps, Vblendmps, Vec, Vec, Vec)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vblendmps, Vblendmps, Vec, Vec, Mem)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vblendpd, Vblendpd, Vec, Vec, Vec, Imm)               // AVX
+  ASMJIT_INST_4x(vblendpd, Vblendpd, Vec, Vec, Mem, Imm)               // AVX
+  ASMJIT_INST_4x(vblendps, Vblendps, Vec, Vec, Vec, Imm)               // AVX
+  ASMJIT_INST_4x(vblendps, Vblendps, Vec, Vec, Mem, Imm)               // AVX
+  ASMJIT_INST_4x(vblendvpd, Vblendvpd, Vec, Vec, Vec, Vec)             // AVX
+  ASMJIT_INST_4x(vblendvpd, Vblendvpd, Vec, Vec, Mem, Vec)             // AVX
+  ASMJIT_INST_4x(vblendvps, Vblendvps, Vec, Vec, Vec, Vec)             // AVX
+  ASMJIT_INST_4x(vblendvps, Vblendvps, Vec, Vec, Mem, Vec)             // AVX
+  ASMJIT_INST_2x(vbroadcastf128, Vbroadcastf128, Vec, Mem)             // AVX
+  ASMJIT_INST_2x(vbroadcastf32x2, Vbroadcastf32x2, Vec, Vec)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcastf32x2, Vbroadcastf32x2, Vec, Mem)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcastf32x4, Vbroadcastf32x4, Vec, Mem)           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vbroadcastf32x8, Vbroadcastf32x8, Vec, Mem)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcastf64x2, Vbroadcastf64x2, Vec, Mem)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcastf64x4, Vbroadcastf64x4, Vec, Mem)           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vbroadcasti128, Vbroadcasti128, Vec, Mem)             // AVX2
+  ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Vec, Vec)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Vec, Mem)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcasti32x4, Vbroadcasti32x4, Vec, Mem)           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vbroadcasti32x8, Vbroadcasti32x8, Vec, Mem)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcasti64x2, Vbroadcasti64x2, Vec, Vec)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcasti64x2, Vbroadcasti64x2, Vec, Mem)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcasti64x4, Vbroadcasti64x4, Vec, Vec)           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vbroadcasti64x4, Vbroadcasti64x4, Vec, Mem)           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vbroadcastsd, Vbroadcastsd, Vec, Mem)                 // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vbroadcastsd, Vbroadcastsd, Vec, Xmm)                 // AVX2 AVX512_F{kz}
+  ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Vec, Mem)                 // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Vec, Xmm)                 // AVX2 AVX512_F{kz}
+  ASMJIT_INST_4x(vcmppd, Vcmppd, Vec, Vec, Vec, Imm)                   // AVX
+  ASMJIT_INST_4x(vcmppd, Vcmppd, Vec, Vec, Mem, Imm)                   // AVX
+  ASMJIT_INST_4x(vcmppd, Vcmppd, KReg, Vec, Vec, Imm)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vcmppd, Vcmppd, KReg, Vec, Mem, Imm)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vcmpps, Vcmpps, Vec, Vec, Vec, Imm)                   // AVX
+  ASMJIT_INST_4x(vcmpps, Vcmpps, Vec, Vec, Mem, Imm)                   // AVX
+  ASMJIT_INST_4x(vcmpps, Vcmpps, KReg, Vec, Vec, Imm)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vcmpps, Vcmpps, KReg, Vec, Mem, Imm)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vcmpsd, Vcmpsd, Xmm, Xmm, Xmm, Imm)                   // AVX
+  ASMJIT_INST_4x(vcmpsd, Vcmpsd, Xmm, Xmm, Mem, Imm)                   // AVX
+  ASMJIT_INST_4x(vcmpsd, Vcmpsd, KReg, Xmm, Xmm, Imm)                  //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vcmpsd, Vcmpsd, KReg, Xmm, Mem, Imm)                  //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vcmpss, Vcmpss, Xmm, Xmm, Xmm, Imm)                   // AVX
+  ASMJIT_INST_4x(vcmpss, Vcmpss, Xmm, Xmm, Mem, Imm)                   // AVX
+  ASMJIT_INST_4x(vcmpss, Vcmpss, KReg, Xmm, Xmm, Imm)                  //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vcmpss, Vcmpss, KReg, Xmm, Mem, Imm)                  //      AVX512_F{kz|sae}
+  ASMJIT_INST_2x(vcomisd, Vcomisd, Xmm, Xmm)                           // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vcomisd, Vcomisd, Xmm, Mem)                           // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vcomiss, Vcomiss, Xmm, Xmm)                           // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vcomiss, Vcomiss, Xmm, Mem)                           // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vcompresspd, Vcompresspd, Vec, Vec)                   //      AVX512_F{kz}
+  ASMJIT_INST_2x(vcompresspd, Vcompresspd, Mem, Vec)                   //      AVX512_F{kz}
+  ASMJIT_INST_2x(vcompressps, Vcompressps, Vec, Vec)                   //      AVX512_F{kz}
+  ASMJIT_INST_2x(vcompressps, Vcompressps, Mem, Vec)                   //      AVX512_F{kz}
+  ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Vec, Vec)                       // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Vec, Mem)                       // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Vec, Vec)                       // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Vec, Mem)                       // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Vec, Vec, Vec)        //      AVX512_BF16{kz|b32}
+  ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Vec, Vec, Mem)        //      AVX512_BF16{kz|b32}
+  ASMJIT_INST_2x(vcvtneps2bf16, Vcvtneps2bf16, Vec, Vec)               //      AVX512_BF16{kz|b32}
+  ASMJIT_INST_2x(vcvtneps2bf16, Vcvtneps2bf16, Vec, Mem)               //      AVX512_BF16{kz|b32}
+  ASMJIT_INST_2x(vcvtpd2dq, Vcvtpd2dq, Vec, Vec)                       // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2dq, Vcvtpd2dq, Vec, Mem)                       // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2ps, Vcvtpd2ps, Vec, Vec)                       // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2ps, Vcvtpd2ps, Vec, Mem)                       // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Vec, Vec)                       //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Vec, Mem)                       //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2udq, Vcvtpd2udq, Vec, Vec)                     //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2udq, Vcvtpd2udq, Vec, Mem)                     //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Vec, Vec)                     //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Vec, Mem)                     //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Vec, Vec)                       // F16C AVX512_F{kz}
+  ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Vec, Mem)                       // F16C AVX512_F{kz}
+  ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Vec, Vec)                       // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Vec, Mem)                       // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Vec, Vec)                       // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Vec, Mem)                       // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vcvtps2ph, Vcvtps2ph, Vec, Vec, Imm)                  // F16C AVX512_F{kz}
+  ASMJIT_INST_3x(vcvtps2ph, Vcvtps2ph, Mem, Vec, Imm)                  // F16C AVX512_F{kz}
+  ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Vec, Vec)                       //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Vec, Mem)                       //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Vec, Vec)                     //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Vec, Mem)                     //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Vec, Vec)                     //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Vec, Mem)                     //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Vec, Vec)                       //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Vec, Mem)                       //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtqq2ps, Vcvtqq2ps, Vec, Vec)                       //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtqq2ps, Vcvtqq2ps, Vec, Mem)                       //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtsd2si, Vcvtsd2si, Gp, Xmm)                        // AVX  AVX512_F{er}
+  ASMJIT_INST_2x(vcvtsd2si, Vcvtsd2si, Gp, Mem)                        // AVX  AVX512_F{er}
+  ASMJIT_INST_3x(vcvtsd2ss, Vcvtsd2ss, Xmm, Xmm, Xmm)                  // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vcvtsd2ss, Vcvtsd2ss, Xmm, Xmm, Mem)                  // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_2x(vcvtsd2usi, Vcvtsd2usi, Gp, Xmm)                      //      AVX512_F{er}
+  ASMJIT_INST_2x(vcvtsd2usi, Vcvtsd2usi, Gp, Mem)                      //      AVX512_F{er}
+  ASMJIT_INST_3x(vcvtsi2sd, Vcvtsi2sd, Xmm, Xmm, Gp)                   // AVX  AVX512_F{er}
+  ASMJIT_INST_3x(vcvtsi2sd, Vcvtsi2sd, Xmm, Xmm, Mem)                  // AVX  AVX512_F{er}
+  ASMJIT_INST_3x(vcvtsi2ss, Vcvtsi2ss, Xmm, Xmm, Gp)                   // AVX  AVX512_F{er}
+  ASMJIT_INST_3x(vcvtsi2ss, Vcvtsi2ss, Xmm, Xmm, Mem)                  // AVX  AVX512_F{er}
+  ASMJIT_INST_3x(vcvtss2sd, Vcvtss2sd, Xmm, Xmm, Xmm)                  // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vcvtss2sd, Vcvtss2sd, Xmm, Xmm, Mem)                  // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_2x(vcvtss2si, Vcvtss2si, Gp, Xmm)                        // AVX  AVX512_F{er}
+  ASMJIT_INST_2x(vcvtss2si, Vcvtss2si, Gp, Mem)                        // AVX  AVX512_F{er}
+  ASMJIT_INST_2x(vcvtss2usi, Vcvtss2usi, Gp, Xmm)                      //      AVX512_F{er}
+  ASMJIT_INST_2x(vcvtss2usi, Vcvtss2usi, Gp, Mem)                      //      AVX512_F{er}
+  ASMJIT_INST_2x(vcvttpd2dq, Vcvttpd2dq, Vec, Vec)                     // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvttpd2dq, Vcvttpd2dq, Vec, Mem)                     // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Vec, Vec)                     //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Vec, Mem)                     //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvttpd2udq, Vcvttpd2udq, Vec, Vec)                   //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvttpd2udq, Vcvttpd2udq, Vec, Mem)                   //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Vec, Vec)                   //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Vec, Mem)                   //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Vec, Vec)                     // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Vec, Mem)                     // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Vec, Vec)                     //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Vec, Mem)                     //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Vec, Vec)                   //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Vec, Mem)                   //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Vec, Vec)                   //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Vec, Mem)                   //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_2x(vcvttsd2si, Vcvttsd2si, Gp, Xmm)                      // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vcvttsd2si, Vcvttsd2si, Gp, Mem)                      // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vcvttsd2usi, Vcvttsd2usi, Gp, Xmm)                    //      AVX512_F{sae}
+  ASMJIT_INST_2x(vcvttsd2usi, Vcvttsd2usi, Gp, Mem)                    //      AVX512_F{sae}
+  ASMJIT_INST_2x(vcvttss2si, Vcvttss2si, Gp, Xmm)                      // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vcvttss2si, Vcvttss2si, Gp, Mem)                      // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vcvttss2usi, Vcvttss2usi, Gp, Xmm)                    //      AVX512_F{sae}
+  ASMJIT_INST_2x(vcvttss2usi, Vcvttss2usi, Gp, Mem)                    //      AVX512_F{sae}
+  ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Vec, Vec)                     //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Vec, Mem)                     //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Vec, Vec)                     //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Vec, Mem)                     //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Vec, Vec)                     //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Vec, Mem)                     //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtuqq2ps, Vcvtuqq2ps, Vec, Vec)                     //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtuqq2ps, Vcvtuqq2ps, Vec, Mem)                     //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vcvtusi2sd, Vcvtusi2sd, Xmm, Xmm, Gp)                 //      AVX512_F{er}
+  ASMJIT_INST_3x(vcvtusi2sd, Vcvtusi2sd, Xmm, Xmm, Mem)                //      AVX512_F{er}
+  ASMJIT_INST_3x(vcvtusi2ss, Vcvtusi2ss, Xmm, Xmm, Gp)                 //      AVX512_F{er}
+  ASMJIT_INST_3x(vcvtusi2ss, Vcvtusi2ss, Xmm, Xmm, Mem)                //      AVX512_F{er}
+  ASMJIT_INST_4x(vdbpsadbw, Vdbpsadbw, Vec, Vec, Vec, Imm)             //      AVX512_BW{kz}
+  ASMJIT_INST_4x(vdbpsadbw, Vdbpsadbw, Vec, Vec, Mem, Imm)             //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vdivpd, Vdivpd, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vdivpd, Vdivpd, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vdivps, Vdivps, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vdivps, Vdivps, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vdivsd, Vdivsd, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vdivsd, Vdivsd, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vdivss, Vdivss, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vdivss, Vdivss, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Vec, Vec, Vec)                  //      AVX512_BF16{kz|b32}
+  ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Vec, Vec, Mem)                  //      AVX512_BF16{kz|b32}
+  ASMJIT_INST_4x(vdppd, Vdppd, Vec, Vec, Vec, Imm)                     // AVX
+  ASMJIT_INST_4x(vdppd, Vdppd, Vec, Vec, Mem, Imm)                     // AVX
+  ASMJIT_INST_4x(vdpps, Vdpps, Vec, Vec, Vec, Imm)                     // AVX
+  ASMJIT_INST_4x(vdpps, Vdpps, Vec, Vec, Mem, Imm)                     // AVX
+  ASMJIT_INST_2x(vexp2pd, Vexp2pd, Vec, Vec)                           //      AVX512_ER{kz|sae|b64}
+  ASMJIT_INST_2x(vexp2pd, Vexp2pd, Vec, Mem)                           //      AVX512_ER{kz|sae|b64}
+  ASMJIT_INST_2x(vexp2ps, Vexp2ps, Vec, Vec)                           //      AVX512_ER{kz|sae|b32}
+  ASMJIT_INST_2x(vexp2ps, Vexp2ps, Vec, Mem)                           //      AVX512_ER{kz|sae|b32}
+  ASMJIT_INST_2x(vexpandpd, Vexpandpd, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vexpandpd, Vexpandpd, Vec, Mem)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vexpandps, Vexpandps, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vexpandps, Vexpandps, Vec, Mem)                       //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextractf128, Vextractf128, Vec, Vec, Imm)            // AVX
+  ASMJIT_INST_3x(vextractf128, Vextractf128, Mem, Vec, Imm)            // AVX
+  ASMJIT_INST_3x(vextractf32x4, Vextractf32x4, Vec, Vec, Imm)          //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextractf32x4, Vextractf32x4, Mem, Vec, Imm)          //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextractf32x8, Vextractf32x8, Vec, Vec, Imm)          //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vextractf32x8, Vextractf32x8, Mem, Vec, Imm)          //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vextractf64x2, Vextractf64x2, Vec, Vec, Imm)          //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vextractf64x2, Vextractf64x2, Mem, Vec, Imm)          //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vextractf64x4, Vextractf64x4, Vec, Vec, Imm)          //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextractf64x4, Vextractf64x4, Mem, Vec, Imm)          //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextracti128, Vextracti128, Vec, Vec, Imm)            // AVX2
+  ASMJIT_INST_3x(vextracti128, Vextracti128, Mem, Vec, Imm)            // AVX2
+  ASMJIT_INST_3x(vextracti32x4, Vextracti32x4, Vec, Vec, Imm)          //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextracti32x4, Vextracti32x4, Mem, Vec, Imm)          //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextracti32x8, Vextracti32x8, Vec, Vec, Imm)          //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vextracti32x8, Vextracti32x8, Mem, Vec, Imm)          //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vextracti64x2, Vextracti64x2, Vec, Vec, Imm)          //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vextracti64x2, Vextracti64x2, Mem, Vec, Imm)          //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vextracti64x4, Vextracti64x4, Vec, Vec, Imm)          //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextracti64x4, Vextracti64x4, Mem, Vec, Imm)          //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextractps, Vextractps, Gp, Xmm, Imm)                 // AVX  AVX512_F
+  ASMJIT_INST_3x(vextractps, Vextractps, Mem, Xmm, Imm)                // AVX  AVX512_F
+  ASMJIT_INST_4x(vfixupimmpd, Vfixupimmpd, Vec, Vec, Vec, Imm)         //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vfixupimmpd, Vfixupimmpd, Vec, Vec, Mem, Imm)         //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vfixupimmps, Vfixupimmps, Vec, Vec, Vec, Imm)         //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vfixupimmps, Vfixupimmps, Vec, Vec, Mem, Imm)         //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vfixupimmsd, Vfixupimmsd, Xmm, Xmm, Xmm, Imm)         //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vfixupimmsd, Vfixupimmsd, Xmm, Xmm, Mem, Imm)         //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vfixupimmss, Vfixupimmss, Xmm, Xmm, Xmm, Imm)         //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vfixupimmss, Vfixupimmss, Xmm, Xmm, Mem, Imm)         //      AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmadd132sd, Vfmadd132sd, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd132sd, Vfmadd132sd, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd132ss, Vfmadd132ss, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd132ss, Vfmadd132ss, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmadd213sd, Vfmadd213sd, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd213sd, Vfmadd213sd, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd213ss, Vfmadd213ss, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd213ss, Vfmadd213ss, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmadd231sd, Vfmadd231sd, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd231sd, Vfmadd231sd, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd231ss, Vfmadd231ss, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd231ss, Vfmadd231ss, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsub132sd, Vfmsub132sd, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub132sd, Vfmsub132sd, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub132ss, Vfmsub132ss, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub132ss, Vfmsub132ss, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsub213sd, Vfmsub213sd, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub213sd, Vfmsub213sd, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub213ss, Vfmsub213ss, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub213ss, Vfmsub213ss, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsub231sd, Vfmsub231sd, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub231sd, Vfmsub231sd, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub231ss, Vfmsub231ss, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub231ss, Vfmsub231ss, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmadd132sd, Vfnmadd132sd, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd132sd, Vfnmadd132sd, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd132ss, Vfnmadd132ss, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd132ss, Vfnmadd132ss, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmadd213sd, Vfnmadd213sd, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd213sd, Vfnmadd213sd, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd213ss, Vfnmadd213ss, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd213ss, Vfnmadd213ss, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmadd231sd, Vfnmadd231sd, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd231sd, Vfnmadd231sd, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd231ss, Vfnmadd231ss, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd231ss, Vfnmadd231ss, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmsub132sd, Vfnmsub132sd, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub132sd, Vfnmsub132sd, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub132ss, Vfnmsub132ss, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub132ss, Vfnmsub132ss, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmsub213sd, Vfnmsub213sd, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub213sd, Vfnmsub213sd, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub213ss, Vfnmsub213ss, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub213ss, Vfnmsub213ss, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmsub231sd, Vfnmsub231sd, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub231sd, Vfnmsub231sd, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub231ss, Vfnmsub231ss, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub231ss, Vfnmsub231ss, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfpclasspd, Vfpclasspd, KReg, Vec, Imm)               //      AVX512_DQ{k|b64}
+  ASMJIT_INST_3x(vfpclasspd, Vfpclasspd, KReg, Mem, Imm)               //      AVX512_DQ{k|b64}
+  ASMJIT_INST_3x(vfpclassps, Vfpclassps, KReg, Vec, Imm)               //      AVX512_DQ{k|b32}
+  ASMJIT_INST_3x(vfpclassps, Vfpclassps, KReg, Mem, Imm)               //      AVX512_DQ{k|b32}
+  ASMJIT_INST_3x(vfpclasssd, Vfpclasssd, KReg, Xmm, Imm)               //      AVX512_DQ{k}
+  ASMJIT_INST_3x(vfpclasssd, Vfpclasssd, KReg, Mem, Imm)               //      AVX512_DQ{k}
+  ASMJIT_INST_3x(vfpclassss, Vfpclassss, KReg, Xmm, Imm)               //      AVX512_DQ{k}
+  ASMJIT_INST_3x(vfpclassss, Vfpclassss, KReg, Mem, Imm)               //      AVX512_DQ{k}
+  ASMJIT_INST_2x(vgatherdpd, Vgatherdpd, Vec, Mem)                     //      AVX512_F{k}
+  ASMJIT_INST_3x(vgatherdpd, Vgatherdpd, Vec, Mem, Vec)                // AVX2
+  ASMJIT_INST_2x(vgatherdps, Vgatherdps, Vec, Mem)                     //      AVX512_F{k}
+  ASMJIT_INST_3x(vgatherdps, Vgatherdps, Vec, Mem, Vec)                // AVX2
+  ASMJIT_INST_1x(vgatherpf0dpd, Vgatherpf0dpd, Mem)                    //      AVX512_PF{k}
+  ASMJIT_INST_1x(vgatherpf0dps, Vgatherpf0dps, Mem)                    //      AVX512_PF{k}
+  ASMJIT_INST_1x(vgatherpf0qpd, Vgatherpf0qpd, Mem)                    //      AVX512_PF{k}
+  ASMJIT_INST_1x(vgatherpf0qps, Vgatherpf0qps, Mem)                    //      AVX512_PF{k}
+  ASMJIT_INST_1x(vgatherpf1dpd, Vgatherpf1dpd, Mem)                    //      AVX512_PF{k}
+  ASMJIT_INST_1x(vgatherpf1dps, Vgatherpf1dps, Mem)                    //      AVX512_PF{k}
+  ASMJIT_INST_1x(vgatherpf1qpd, Vgatherpf1qpd, Mem)                    //      AVX512_PF{k}
+  ASMJIT_INST_1x(vgatherpf1qps, Vgatherpf1qps, Mem)                    //      AVX512_PF{k}
+  ASMJIT_INST_2x(vgatherqpd, Vgatherqpd, Vec, Mem)                     //      AVX512_F{k}
+  ASMJIT_INST_3x(vgatherqpd, Vgatherqpd, Vec, Mem, Vec)                // AVX2
+  ASMJIT_INST_2x(vgatherqps, Vgatherqps, Vec, Mem)                     //      AVX512_F{k}
+  ASMJIT_INST_3x(vgatherqps, Vgatherqps, Vec, Mem, Vec)                // AVX2
+  ASMJIT_INST_2x(vgetexppd, Vgetexppd, Vec, Vec)                       //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vgetexppd, Vgetexppd, Vec, Mem)                       //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vgetexpps, Vgetexpps, Vec, Vec)                       //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vgetexpps, Vgetexpps, Vec, Mem)                       //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vgetexpsd, Vgetexpsd, Xmm, Xmm, Xmm)                  //      AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vgetexpsd, Vgetexpsd, Xmm, Xmm, Mem)                  //      AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vgetexpss, Vgetexpss, Xmm, Xmm, Xmm)                  //      AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vgetexpss, Vgetexpss, Xmm, Xmm, Mem)                  //      AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vgetmantpd, Vgetmantpd, Vec, Vec, Imm)                //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vgetmantpd, Vgetmantpd, Vec, Mem, Imm)                //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vgetmantps, Vgetmantps, Vec, Vec, Imm)                //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vgetmantps, Vgetmantps, Vec, Mem, Imm)                //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vgetmantsd, Vgetmantsd, Xmm, Xmm, Xmm, Imm)           //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vgetmantsd, Vgetmantsd, Xmm, Xmm, Mem, Imm)           //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vgetmantss, Vgetmantss, Xmm, Xmm, Xmm, Imm)           //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vgetmantss, Vgetmantss, Xmm, Xmm, Mem, Imm)           //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vgf2p8affineinvqb, Vgf2p8affineinvqb,Vec,Vec,Vec,Imm) // AVX  AVX512_VL{kz} GFNI
+  ASMJIT_INST_4x(vgf2p8affineinvqb, Vgf2p8affineinvqb,Vec,Vec,Mem,Imm) // AVX  AVX512_VL{kz} GFNI
+  ASMJIT_INST_4x(vgf2p8affineqb, Vgf2p8affineqb, Vec, Vec, Vec, Imm)   // AVX  AVX512_VL{kz} GFNI
+  ASMJIT_INST_4x(vgf2p8affineqb, Vgf2p8affineqb, Vec, Vec, Mem, Imm)   // AVX  AVX512_VL{kz} GFNI
+  ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Vec, Vec, Vec)                // AVX  AVX512_VL{kz} GFNI
+  ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Vec, Vec, Mem)                // AVX  AVX512_VL{kz} GFNI
+  ASMJIT_INST_3x(vhaddpd, Vhaddpd, Vec, Vec, Vec)                      // AVX
+  ASMJIT_INST_3x(vhaddpd, Vhaddpd, Vec, Vec, Mem)                      // AVX
+  ASMJIT_INST_3x(vhaddps, Vhaddps, Vec, Vec, Vec)                      // AVX
+  ASMJIT_INST_3x(vhaddps, Vhaddps, Vec, Vec, Mem)                      // AVX
+  ASMJIT_INST_3x(vhsubpd, Vhsubpd, Vec, Vec, Vec)                      // AVX
+  ASMJIT_INST_3x(vhsubpd, Vhsubpd, Vec, Vec, Mem)                      // AVX
+  ASMJIT_INST_3x(vhsubps, Vhsubps, Vec, Vec, Vec)                      // AVX
+  ASMJIT_INST_3x(vhsubps, Vhsubps, Vec, Vec, Mem)                      // AVX
+  ASMJIT_INST_4x(vinsertf128, Vinsertf128, Vec, Vec, Vec, Imm)         // AVX
+  ASMJIT_INST_4x(vinsertf128, Vinsertf128, Vec, Vec, Mem, Imm)         // AVX
+  ASMJIT_INST_4x(vinsertf32x4, Vinsertf32x4, Vec, Vec, Vec, Imm)       //      AVX512_F{kz}
+  ASMJIT_INST_4x(vinsertf32x4, Vinsertf32x4, Vec, Vec, Mem, Imm)       //      AVX512_F{kz}
+  ASMJIT_INST_4x(vinsertf32x8, Vinsertf32x8, Vec, Vec, Vec, Imm)       //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vinsertf32x8, Vinsertf32x8, Vec, Vec, Mem, Imm)       //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vinsertf64x2, Vinsertf64x2, Vec, Vec, Vec, Imm)       //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vinsertf64x2, Vinsertf64x2, Vec, Vec, Mem, Imm)       //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vinsertf64x4, Vinsertf64x4, Vec, Vec, Vec, Imm)       //      AVX512_F{kz}
+  ASMJIT_INST_4x(vinsertf64x4, Vinsertf64x4, Vec, Vec, Mem, Imm)       //      AVX512_F{kz}
+  ASMJIT_INST_4x(vinserti128, Vinserti128, Vec, Vec, Vec, Imm)         // AVX2
+  ASMJIT_INST_4x(vinserti128, Vinserti128, Vec, Vec, Mem, Imm)         // AVX2
+  ASMJIT_INST_4x(vinserti32x4, Vinserti32x4, Vec, Vec, Vec, Imm)       //      AVX512_F{kz}
+  ASMJIT_INST_4x(vinserti32x4, Vinserti32x4, Vec, Vec, Mem, Imm)       //      AVX512_F{kz}
+  ASMJIT_INST_4x(vinserti32x8, Vinserti32x8, Vec, Vec, Vec, Imm)       //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vinserti32x8, Vinserti32x8, Vec, Vec, Mem, Imm)       //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vinserti64x2, Vinserti64x2, Vec, Vec, Vec, Imm)       //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vinserti64x2, Vinserti64x2, Vec, Vec, Mem, Imm)       //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vinserti64x4, Vinserti64x4, Vec, Vec, Vec, Imm)       //      AVX512_F{kz}
+  ASMJIT_INST_4x(vinserti64x4, Vinserti64x4, Vec, Vec, Mem, Imm)       //      AVX512_F{kz}
+  ASMJIT_INST_4x(vinsertps, Vinsertps, Xmm, Xmm, Xmm, Imm)             // AVX  AVX512_F
+  ASMJIT_INST_4x(vinsertps, Vinsertps, Xmm, Xmm, Mem, Imm)             // AVX  AVX512_F
+  ASMJIT_INST_2x(vlddqu, Vlddqu, Vec, Mem)                             // AVX
+  ASMJIT_INST_1x(vldmxcsr, Vldmxcsr, Mem)                              // AVX
+  ASMJIT_INST_3x(vmaskmovdqu, Vmaskmovdqu, Vec, Vec, DS_ZDI)           // AVX  [EXPLICIT]
+  ASMJIT_INST_3x(vmaskmovpd, Vmaskmovpd, Mem, Vec, Vec)                // AVX
+  ASMJIT_INST_3x(vmaskmovpd, Vmaskmovpd, Vec, Vec, Mem)                // AVX
+  ASMJIT_INST_3x(vmaskmovps, Vmaskmovps, Mem, Vec, Vec)                // AVX
+  ASMJIT_INST_3x(vmaskmovps, Vmaskmovps, Vec, Vec, Mem)                // AVX
+  ASMJIT_INST_3x(vmaxpd, Vmaxpd, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vmaxpd, Vmaxpd, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vmaxps, Vmaxps, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vmaxps, Vmaxps, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vmaxsd, Vmaxsd, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vmaxsd, Vmaxsd, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vmaxss, Vmaxss, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vmaxss, Vmaxss, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vminpd, Vminpd, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vminpd, Vminpd, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vminps, Vminps, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vminps, Vminps, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vminsd, Vminsd, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vminsd, Vminsd, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vminss, Vminss, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vminss, Vminss, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_2x(vmovapd, Vmovapd, Vec, Vec)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovapd, Vmovapd, Vec, Mem)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovapd, Vmovapd, Mem, Vec)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovaps, Vmovaps, Vec, Vec)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovaps, Vmovaps, Vec, Mem)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovaps, Vmovaps, Mem, Vec)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovd, Vmovd, Gp, Xmm)                                // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovd, Vmovd, Mem, Xmm)                               // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovd, Vmovd, Xmm, Gp)                                // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovd, Vmovd, Xmm, Mem)                               // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovddup, Vmovddup, Vec, Vec)                         // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovddup, Vmovddup, Vec, Mem)                         // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqa, Vmovdqa, Vec, Vec)                           // AVX
+  ASMJIT_INST_2x(vmovdqa, Vmovdqa, Vec, Mem)                           // AVX
+  ASMJIT_INST_2x(vmovdqa, Vmovdqa, Mem, Vec)                           // AVX
+  ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Vec, Mem)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Vec, Mem)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqu, Vmovdqu, Vec, Vec)                           // AVX
+  ASMJIT_INST_2x(vmovdqu, Vmovdqu, Vec, Mem)                           // AVX
+  ASMJIT_INST_2x(vmovdqu, Vmovdqu, Mem, Vec)                           // AVX
+  ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Vec, Vec)                       //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Vec, Mem)                       //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Mem, Vec)                       //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Vec, Mem)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Vec, Mem)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Vec, Vec)                         //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Vec, Mem)                         //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Mem, Vec)                         //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vmovhlps, Vmovhlps, Xmm, Xmm, Xmm)                    // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovhpd, Vmovhpd, Mem, Xmm)                           // AVX  AVX512_F
+  ASMJIT_INST_3x(vmovhpd, Vmovhpd, Xmm, Xmm, Mem)                      // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovhps, Vmovhps, Mem, Xmm)                           // AVX  AVX512_F
+  ASMJIT_INST_3x(vmovhps, Vmovhps, Xmm, Xmm, Mem)                      // AVX  AVX512_F
+  ASMJIT_INST_3x(vmovlhps, Vmovlhps, Xmm, Xmm, Xmm)                    // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovlpd, Vmovlpd, Mem, Xmm)                           // AVX  AVX512_F
+  ASMJIT_INST_3x(vmovlpd, Vmovlpd, Xmm, Xmm, Mem)                      // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovlps, Vmovlps, Mem, Xmm)                           // AVX  AVX512_F
+  ASMJIT_INST_3x(vmovlps, Vmovlps, Xmm, Xmm, Mem)                      // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovmskpd, Vmovmskpd, Gp, Vec)                        // AVX
+  ASMJIT_INST_2x(vmovmskps, Vmovmskps, Gp, Vec)                        // AVX
+  ASMJIT_INST_2x(vmovntdq, Vmovntdq, Mem, Vec)                         // AVX+ AVX512_F
+  ASMJIT_INST_2x(vmovntdqa, Vmovntdqa, Vec, Mem)                       // AVX+ AVX512_F
+  ASMJIT_INST_2x(vmovntpd, Vmovntpd, Mem, Vec)                         // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovntps, Vmovntps, Mem, Vec)                         // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovq, Vmovq, Gp, Xmm)                                // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovq, Vmovq, Mem, Xmm)                               // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovq, Vmovq, Xmm, Mem)                               // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovq, Vmovq, Xmm, Gp)                                // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovq, Vmovq, Xmm, Xmm)                               // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovsd, Vmovsd, Mem, Xmm)                             // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovsd, Vmovsd, Xmm, Mem)                             // AVX  AVX512_F{kz}
+  ASMJIT_INST_3x(vmovsd, Vmovsd, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovshdup, Vmovshdup, Vec, Vec)                       // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovshdup, Vmovshdup, Vec, Mem)                       // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovsldup, Vmovsldup, Vec, Vec)                       // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovsldup, Vmovsldup, Vec, Mem)                       // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovss, Vmovss, Mem, Xmm)                             // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovss, Vmovss, Xmm, Mem)                             // AVX  AVX512_F{kz}
+  ASMJIT_INST_3x(vmovss, Vmovss, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovupd, Vmovupd, Vec, Vec)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovupd, Vmovupd, Vec, Mem)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovupd, Vmovupd, Mem, Vec)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovups, Vmovups, Vec, Vec)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovups, Vmovups, Vec, Mem)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovups, Vmovups, Mem, Vec)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_4x(vmpsadbw, Vmpsadbw, Vec, Vec, Vec, Imm)               // AVX+
+  ASMJIT_INST_4x(vmpsadbw, Vmpsadbw, Vec, Vec, Mem, Imm)               // AVX+
+  ASMJIT_INST_3x(vmulpd, Vmulpd, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vmulpd, Vmulpd, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vmulps, Vmulps, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vmulps, Vmulps, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vmulsd, Vmulsd, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vmulsd, Vmulsd, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vmulss, Vmulss, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vmulss, Vmulss, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vorpd, Vorpd, Vec, Vec, Vec)                          // AVX  AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vorpd, Vorpd, Vec, Vec, Mem)                          // AVX  AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vorps, Vorps, Vec, Vec, Vec)                          // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vorps, Vorps, Vec, Vec, Mem)                          // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vp2intersectd, Vp2intersectd, KReg, KReg, Vec, Vec)   //      AVX512_VP2INTERSECT{kz}
+  ASMJIT_INST_4x(vp2intersectd, Vp2intersectd, KReg, KReg, Vec, Mem)   //      AVX512_VP2INTERSECT{kz}
+  ASMJIT_INST_4x(vp2intersectq, Vp2intersectq, KReg, KReg, Vec, Vec)   //      AVX512_VP2INTERSECT{kz}
+  ASMJIT_INST_4x(vp2intersectq, Vp2intersectq, KReg, KReg, Vec, Mem)   //      AVX512_VP2INTERSECT{kz}
+  ASMJIT_INST_6x(vp4dpwssd, Vp4dpwssd, Zmm, Zmm, Zmm, Zmm, Zmm, Mem)   //      AVX512_4FMAPS{kz}
+  ASMJIT_INST_6x(vp4dpwssds, Vp4dpwssds, Zmm, Zmm, Zmm, Zmm, Zmm, Mem) //      AVX512_4FMAPS{kz}
+  ASMJIT_INST_2x(vpabsb, Vpabsb, Vec, Vec)                             // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_2x(vpabsb, Vpabsb, Vec, Mem)                             // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_2x(vpabsd, Vpabsd, Vec, Vec)                             // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpabsd, Vpabsd, Vec, Mem)                             // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpabsq, Vpabsq, Vec, Vec)                             //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpabsq, Vpabsq, Vec, Mem)                             //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpabsw, Vpabsw, Vec, Vec)                             // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_2x(vpabsw, Vpabsw, Vec, Mem)                             // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpackssdw, Vpackssdw, Vec, Vec, Vec)                  // AVX+ AVX512_BW{kz|b32}
+  ASMJIT_INST_3x(vpackssdw, Vpackssdw, Vec, Vec, Mem)                  // AVX+ AVX512_BW{kz|b32}
+  ASMJIT_INST_3x(vpacksswb, Vpacksswb, Vec, Vec, Vec)                  // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpacksswb, Vpacksswb, Vec, Vec, Mem)                  // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpackusdw, Vpackusdw, Vec, Vec, Vec)                  // AVX+ AVX512_BW{kz|b32}
+  ASMJIT_INST_3x(vpackusdw, Vpackusdw, Vec, Vec, Mem)                  // AVX+ AVX512_BW{kz|b32}
+  ASMJIT_INST_3x(vpackuswb, Vpackuswb, Vec, Vec, Vec)                  // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpackuswb, Vpackuswb, Vec, Vec, Mem)                  // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddb, Vpaddb, Vec, Vec, Vec)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddb, Vpaddb, Vec, Vec, Mem)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddd, Vpaddd, Vec, Vec, Vec)                        // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpaddd, Vpaddd, Vec, Vec, Mem)                        // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpaddq, Vpaddq, Vec, Vec, Vec)                        // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpaddq, Vpaddq, Vec, Vec, Mem)                        // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpaddsb, Vpaddsb, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddsb, Vpaddsb, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddsw, Vpaddsw, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddsw, Vpaddsw, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddusb, Vpaddusb, Vec, Vec, Vec)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddusb, Vpaddusb, Vec, Vec, Mem)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddusw, Vpaddusw, Vec, Vec, Vec)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddusw, Vpaddusw, Vec, Vec, Mem)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddw, Vpaddw, Vec, Vec, Vec)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddw, Vpaddw, Vec, Vec, Mem)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_4x(vpalignr, Vpalignr, Vec, Vec, Vec, Imm)               // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_4x(vpalignr, Vpalignr, Vec, Vec, Mem, Imm)               // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpand, Vpand, Vec, Vec, Vec)                          // AVX+
+  ASMJIT_INST_3x(vpand, Vpand, Vec, Vec, Mem)                          // AVX+
+  ASMJIT_INST_3x(vpandd, Vpandd, Vec, Vec, Vec)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpandd, Vpandd, Vec, Vec, Mem)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpandn, Vpandn, Vec, Vec, Vec)                        // AV+
+  ASMJIT_INST_3x(vpandn, Vpandn, Vec, Vec, Mem)                        // AVX+
+  ASMJIT_INST_3x(vpandnd, Vpandnd, Vec, Vec, Vec)                      //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpandnd, Vpandnd, Vec, Vec, Mem)                      //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpandnq, Vpandnq, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpandnq, Vpandnq, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpandq, Vpandq, Vec, Vec, Vec)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpandq, Vpandq, Vec, Vec, Mem)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpavgb, Vpavgb, Vec, Vec, Vec)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpavgb, Vpavgb, Vec, Vec, Mem)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpavgw, Vpavgw, Vec, Vec, Vec)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpavgw, Vpavgw, Vec, Vec, Mem)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_4x(vpblendd, Vpblendd, Vec, Vec, Vec, Imm)               // AVX2
+  ASMJIT_INST_4x(vpblendd, Vpblendd, Vec, Vec, Mem, Imm)               // AVX2
+  ASMJIT_INST_3x(vpblendmb, Vpblendmb, Vec, Vec, Vec)                  //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpblendmb, Vpblendmb, Vec, Vec, Mem)                  //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpblendmd, Vpblendmd, Vec, Vec, Vec)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpblendmd, Vpblendmd, Vec, Vec, Mem)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpblendmq, Vpblendmq, Vec, Vec, Vec)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpblendmq, Vpblendmq, Vec, Vec, Mem)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpblendmw, Vpblendmw, Vec, Vec, Vec)                  //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpblendmw, Vpblendmw, Vec, Vec, Mem)                  //      AVX512_BW{kz}
+  ASMJIT_INST_4x(vpblendvb, Vpblendvb, Vec, Vec, Vec, Vec)             // AVX+
+  ASMJIT_INST_4x(vpblendvb, Vpblendvb, Vec, Vec, Mem, Vec)             // AVX+
+  ASMJIT_INST_4x(vpblendw, Vpblendw, Vec, Vec, Vec, Imm)               // AVX+
+  ASMJIT_INST_4x(vpblendw, Vpblendw, Vec, Vec, Mem, Imm)               // AVX+
+  ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Vec, Vec)                 // AVX2 AVX512_BW{kz}
+  ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Vec, Mem)                 // AVX2 AVX512_BW{kz}
+  ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Vec, Gp)                  //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Vec, Vec)                 // AVX2 AVX512_F{kz}
+  ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Vec, Mem)                 // AVX2 AVX512_F{kz}
+  ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Vec, Gp)                  //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpbroadcastmb2q, Vpbroadcastmb2q, Vec, KReg)          //      AVX512_CD
+  ASMJIT_INST_2x(vpbroadcastmw2d, Vpbroadcastmw2d, Vec, KReg)          //      AVX512_CD
+  ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Vec, Vec)                 // AVX2 AVX512_F{kz}
+  ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Vec, Mem)                 // AVX2 AVX512_F{kz}
+  ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Vec, Gp)                  //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Vec, Vec)                 // AVX2 AVX512_BW{kz}
+  ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Vec, Mem)                 // AVX2 AVX512_BW{kz}
+  ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Vec, Gp)                  //      AVX512_BW{kz}
+  ASMJIT_INST_4x(vpclmulqdq, Vpclmulqdq, Vec, Vec, Vec, Imm)           // AVX  VPCLMULQDQ AVX512_F
+  ASMJIT_INST_4x(vpclmulqdq, Vpclmulqdq, Vec, Vec, Mem, Imm)           // AVX  VPCLMULQDQ AVX512_F
+  ASMJIT_INST_4x(vpcmpb, Vpcmpb, KReg, Vec, Vec, Imm)                  //      AVX512_BW{k}
+  ASMJIT_INST_4x(vpcmpb, Vpcmpb, KReg, Vec, Mem, Imm)                  //      AVX512_BW{k}
+  ASMJIT_INST_4x(vpcmpd, Vpcmpd, KReg, Vec, Vec, Imm)                  //      AVX512_F{k|b32}
+  ASMJIT_INST_4x(vpcmpd, Vpcmpd, KReg, Vec, Mem, Imm)                  //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Vec, Vec)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Vec, Mem)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Vec, Vec)                   //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Vec, Mem)                   //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Vec, Vec)                   //      AVX512_F{k|b64}
+  ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Vec, Mem)                   //      AVX512_F{k|b64}
+  ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Vec, Vec)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Vec, Mem)                   //      AVX512_BW{k}
+  ASMJIT_INST_6x(vpcmpestri, Vpcmpestri, Vec, Vec, Imm, Gp_ECX, Gp_EAX, Gp_EDX) // AVX  [EXPLICIT]
+  ASMJIT_INST_6x(vpcmpestri, Vpcmpestri, Vec, Mem, Imm, Gp_ECX, Gp_EAX, Gp_EDX) // AVX  [EXPLICIT]
+  ASMJIT_INST_6x(vpcmpestrm, Vpcmpestrm, Vec, Vec, Imm, XMM0, Gp_EAX, Gp_EDX)   // AVX  [EXPLICIT]
+  ASMJIT_INST_6x(vpcmpestrm, Vpcmpestrm, Vec, Mem, Imm, XMM0, Gp_EAX, Gp_EDX)   // AVX  [EXPLICIT]
+  ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Vec, Vec)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Vec, Mem)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Vec, Vec)                   //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Vec, Mem)                   //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Vec, Vec)                   //      AVX512_F{k|b64}
+  ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Vec, Mem)                   //      AVX512_F{k|b64}
+  ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Vec, Vec)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Vec, Mem)                   //      AVX512_BW{k}
+  ASMJIT_INST_4x(vpcmpistri, Vpcmpistri, Vec, Vec, Imm, Gp_ECX)        // AVX  [EXPLICIT]
+  ASMJIT_INST_4x(vpcmpistri, Vpcmpistri, Vec, Mem, Imm, Gp_ECX)        // AVX  [EXPLICIT]
+  ASMJIT_INST_4x(vpcmpistrm, Vpcmpistrm, Vec, Vec, Imm, XMM0)          // AVX  [EXPLICIT]
+  ASMJIT_INST_4x(vpcmpistrm, Vpcmpistrm, Vec, Mem, Imm, XMM0)          // AVX  [EXPLICIT]
+  ASMJIT_INST_4x(vpcmpq, Vpcmpq, KReg, Vec, Vec, Imm)                  //      AVX512_F{k|b64}
+  ASMJIT_INST_4x(vpcmpq, Vpcmpq, KReg, Vec, Mem, Imm)                  //      AVX512_F{k|b64}
+  ASMJIT_INST_4x(vpcmpub, Vpcmpub, KReg, Vec, Vec, Imm)                //      AVX512_BW{k}
+  ASMJIT_INST_4x(vpcmpub, Vpcmpub, KReg, Vec, Mem, Imm)                //      AVX512_BW{k}
+  ASMJIT_INST_4x(vpcmpud, Vpcmpud, KReg, Vec, Vec, Imm)                //      AVX512_F{k|b32}
+  ASMJIT_INST_4x(vpcmpud, Vpcmpud, KReg, Vec, Mem, Imm)                //      AVX512_F{k|b32}
+  ASMJIT_INST_4x(vpcmpuq, Vpcmpuq, KReg, Vec, Vec, Imm)                //      AVX512_F{k|b64}
+  ASMJIT_INST_4x(vpcmpuq, Vpcmpuq, KReg, Vec, Mem, Imm)                //      AVX512_F{k|b64}
+  ASMJIT_INST_4x(vpcmpuw, Vpcmpuw, KReg, Vec, Vec, Imm)                //      AVX512_BW{k|b64}
+  ASMJIT_INST_4x(vpcmpuw, Vpcmpuw, KReg, Vec, Mem, Imm)                //      AVX512_BW{k|b64}
+  ASMJIT_INST_4x(vpcmpw, Vpcmpw, KReg, Vec, Vec, Imm)                  //      AVX512_BW{k|b64}
+  ASMJIT_INST_4x(vpcmpw, Vpcmpw, KReg, Vec, Mem, Imm)                  //      AVX512_BW{k|b64}
+  ASMJIT_INST_2x(vpcompressb, Vpcompressb, Vec, Vec)                   //      AVX512_VBMI2{kz}
+  ASMJIT_INST_2x(vpcompressb, Vpcompressb, Mem, Vec)                   //      AVX512_VBMI2{kz}
+  ASMJIT_INST_2x(vpcompressd, Vpcompressd, Vec, Vec)                   //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpcompressd, Vpcompressd, Mem, Vec)                   //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpcompressq, Vpcompressq, Vec, Vec)                   //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpcompressq, Vpcompressq, Mem, Vec)                   //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpcompressw, Vpcompressw, Vec, Vec)                   //      AVX512_VBMI2{kz}
+  ASMJIT_INST_2x(vpcompressw, Vpcompressw, Mem, Vec)                   //      AVX512_VBMI2{kz}
+  ASMJIT_INST_2x(vpconflictd, Vpconflictd, Vec, Vec)                   //      AVX512_CD{kz|b32}
+  ASMJIT_INST_2x(vpconflictd, Vpconflictd, Vec, Mem)                   //      AVX512_CD{kz|b32}
+  ASMJIT_INST_2x(vpconflictq, Vpconflictq, Vec, Vec)                   //      AVX512_CD{kz|b32}
+  ASMJIT_INST_2x(vpconflictq, Vpconflictq, Vec, Mem)                   //      AVX512_CD{kz|b32}
+  ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Vec, Vec, Vec)                    // AVX_VNNI AVX512_VNNI{kz|b32}
+  ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Vec, Vec, Mem)                    // AVX_VNNI AVX512_VNNI{kz|b32}
+  ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Vec, Vec, Vec)                  // AVX_VNNI AVX512_VNNI{kz|b32}
+  ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Vec, Vec, Mem)                  // AVX_VNNI AVX512_VNNI{kz|b32}
+  ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Vec, Vec, Vec)                    // AVX_VNNI AVX512_VNNI{kz|b32}
+  ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Vec, Vec, Mem)                    // AVX_VNNI AVX512_VNNI{kz|b32}
+  ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Vec, Vec, Vec)                  // AVX_VNNI AVX512_VNNI{kz|b32}
+  ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Vec, Vec, Mem)                  // AVX_VNNI AVX512_VNNI{kz|b32}
+  ASMJIT_INST_4x(vperm2f128, Vperm2f128, Vec, Vec, Vec, Imm)           // AVX
+  ASMJIT_INST_4x(vperm2f128, Vperm2f128, Vec, Vec, Mem, Imm)           // AVX
+  ASMJIT_INST_4x(vperm2i128, Vperm2i128, Vec, Vec, Vec, Imm)           // AVX2
+  ASMJIT_INST_4x(vperm2i128, Vperm2i128, Vec, Vec, Mem, Imm)           // AVX2
+  ASMJIT_INST_3x(vpermb, Vpermb, Vec, Vec, Vec)                        //      AVX512_VBMI{kz}
+  ASMJIT_INST_3x(vpermb, Vpermb, Vec, Vec, Mem)                        //      AVX512_VBMI{kz}
+  ASMJIT_INST_3x(vpermd, Vpermd, Vec, Vec, Vec)                        // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermd, Vpermd, Vec, Vec, Mem)                        // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermi2b, Vpermi2b, Vec, Vec, Vec)                    //      AVX512_VBMI{kz}
+  ASMJIT_INST_3x(vpermi2b, Vpermi2b, Vec, Vec, Mem)                    //      AVX512_VBMI{kz}
+  ASMJIT_INST_3x(vpermi2d, Vpermi2d, Vec, Vec, Vec)                    //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermi2d, Vpermi2d, Vec, Vec, Mem)                    //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Vec, Vec, Vec)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Vec, Vec, Mem)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Vec, Vec, Vec)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Vec, Vec, Mem)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermi2q, Vpermi2q, Vec, Vec, Vec)                    //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermi2q, Vpermi2q, Vec, Vec, Mem)                    //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermi2w, Vpermi2w, Vec, Vec, Vec)                    //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpermi2w, Vpermi2w, Vec, Vec, Mem)                    //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpermilpd, Vpermilpd, Vec, Vec, Vec)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermilpd, Vpermilpd, Vec, Vec, Mem)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermilpd, Vpermilpd, Vec, Vec, Imm)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermilpd, Vpermilpd, Vec, Mem, Imm)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermilps, Vpermilps, Vec, Vec, Vec)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermilps, Vpermilps, Vec, Vec, Mem)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermilps, Vpermilps, Vec, Vec, Imm)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermilps, Vpermilps, Vec, Mem, Imm)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermpd, Vpermpd, Vec, Vec, Imm)                      // AVX2
+  ASMJIT_INST_3x(vpermpd, Vpermpd, Vec, Mem, Imm)                      // AVX2
+  ASMJIT_INST_3x(vpermpd, Vpermpd, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermpd, Vpermpd, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermps, Vpermps, Vec, Vec, Vec)                      // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermps, Vpermps, Vec, Vec, Mem)                      // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermq, Vpermq, Vec, Vec, Imm)                        // AVX2 AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermq, Vpermq, Vec, Mem, Imm)                        // AVX2 AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermq, Vpermq, Vec, Vec, Vec)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermq, Vpermq, Vec, Vec, Mem)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermt2b, Vpermt2b, Vec, Vec, Vec)                    //      AVX512_VBMI{kz}
+  ASMJIT_INST_3x(vpermt2b, Vpermt2b, Vec, Vec, Mem)                    //      AVX512_VBMI{kz}
+  ASMJIT_INST_3x(vpermt2d, Vpermt2d, Vec, Vec, Vec)                    //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermt2d, Vpermt2d, Vec, Vec, Mem)                    //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Vec, Vec, Vec)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Vec, Vec, Mem)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Vec, Vec, Vec)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Vec, Vec, Mem)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermt2q, Vpermt2q, Vec, Vec, Vec)                    //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermt2q, Vpermt2q, Vec, Vec, Mem)                    //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermt2w, Vpermt2w, Vec, Vec, Vec)                    //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpermt2w, Vpermt2w, Vec, Vec, Mem)                    //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpermw, Vpermw, Vec, Vec, Vec)                        //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpermw, Vpermw, Vec, Vec, Mem)                        //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vpexpandb, Vpexpandb, Vec, Vec)                       //      AVX512_VBMI2{kz}
+  ASMJIT_INST_2x(vpexpandb, Vpexpandb, Vec, Mem)                       //      AVX512_VBMI2{kz}
+  ASMJIT_INST_2x(vpexpandd, Vpexpandd, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpexpandd, Vpexpandd, Vec, Mem)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpexpandq, Vpexpandq, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpexpandq, Vpexpandq, Vec, Mem)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpexpandw, Vpexpandw, Vec, Vec)                       //      AVX512_VBMI2{kz}
+  ASMJIT_INST_2x(vpexpandw, Vpexpandw, Vec, Mem)                       //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpextrb, Vpextrb, Gp, Xmm, Imm)                       // AVX  AVX512_BW
+  ASMJIT_INST_3x(vpextrb, Vpextrb, Mem, Xmm, Imm)                      // AVX  AVX512_BW
+  ASMJIT_INST_3x(vpextrd, Vpextrd, Gp, Xmm, Imm)                       // AVX  AVX512_DQ
+  ASMJIT_INST_3x(vpextrd, Vpextrd, Mem, Xmm, Imm)                      // AVX  AVX512_DQ
+  ASMJIT_INST_3x(vpextrq, Vpextrq, Gp, Xmm, Imm)                       // AVX  AVX512_DQ
+  ASMJIT_INST_3x(vpextrq, Vpextrq, Mem, Xmm, Imm)                      // AVX  AVX512_DQ
+  ASMJIT_INST_3x(vpextrw, Vpextrw, Gp, Xmm, Imm)                       // AVX  AVX512_BW
+  ASMJIT_INST_3x(vpextrw, Vpextrw, Mem, Xmm, Imm)                      // AVX  AVX512_BW
+  ASMJIT_INST_2x(vpgatherdd, Vpgatherdd, Vec, Mem)                     //      AVX512_F{k}
+  ASMJIT_INST_3x(vpgatherdd, Vpgatherdd, Vec, Mem, Vec)                // AVX2
+  ASMJIT_INST_2x(vpgatherdq, Vpgatherdq, Vec, Mem)                     //      AVX512_F{k}
+  ASMJIT_INST_3x(vpgatherdq, Vpgatherdq, Vec, Mem, Vec)                // AVX2
+  ASMJIT_INST_2x(vpgatherqd, Vpgatherqd, Vec, Mem)                     //      AVX512_F{k}
+  ASMJIT_INST_3x(vpgatherqd, Vpgatherqd, Vec, Mem, Vec)                // AVX2
+  ASMJIT_INST_2x(vpgatherqq, Vpgatherqq, Vec, Mem)                     //      AVX512_F{k}
+  ASMJIT_INST_3x(vpgatherqq, Vpgatherqq, Vec, Mem, Vec)                // AVX2
+  ASMJIT_INST_3x(vphaddd, Vphaddd, Vec, Vec, Vec)                      // AVX+
+  ASMJIT_INST_3x(vphaddd, Vphaddd, Vec, Vec, Mem)                      // AVX+
+  ASMJIT_INST_3x(vphaddsw, Vphaddsw, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vphaddsw, Vphaddsw, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vphaddw, Vphaddw, Vec, Vec, Vec)                      // AVX+
+  ASMJIT_INST_3x(vphaddw, Vphaddw, Vec, Vec, Mem)                      // AVX+
+  ASMJIT_INST_2x(vphminposuw, Vphminposuw, Vec, Vec)                   // AVX
+  ASMJIT_INST_2x(vphminposuw, Vphminposuw, Vec, Mem)                   // AVX
+  ASMJIT_INST_3x(vphsubd, Vphsubd, Vec, Vec, Vec)                      // AVX+
+  ASMJIT_INST_3x(vphsubd, Vphsubd, Vec, Vec, Mem)                      // AVX+
+  ASMJIT_INST_3x(vphsubsw, Vphsubsw, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vphsubsw, Vphsubsw, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vphsubw, Vphsubw, Vec, Vec, Vec)                      // AVX+
+  ASMJIT_INST_3x(vphsubw, Vphsubw, Vec, Vec, Mem)                      // AVX+
+  ASMJIT_INST_4x(vpinsrb, Vpinsrb, Xmm, Xmm, Gp, Imm)                  // AVX  AVX512_BW{kz}
+  ASMJIT_INST_4x(vpinsrb, Vpinsrb, Xmm, Xmm, Mem, Imm)                 // AVX  AVX512_BW{kz}
+  ASMJIT_INST_4x(vpinsrd, Vpinsrd, Xmm, Xmm, Gp, Imm)                  // AVX  AVX512_DQ{kz}
+  ASMJIT_INST_4x(vpinsrd, Vpinsrd, Xmm, Xmm, Mem, Imm)                 // AVX  AVX512_DQ{kz}
+  ASMJIT_INST_4x(vpinsrq, Vpinsrq, Xmm, Xmm, Gp, Imm)                  // AVX  AVX512_DQ{kz}
+  ASMJIT_INST_4x(vpinsrq, Vpinsrq, Xmm, Xmm, Mem, Imm)                 // AVX  AVX512_DQ{kz}
+  ASMJIT_INST_4x(vpinsrw, Vpinsrw, Xmm, Xmm, Gp, Imm)                  // AVX  AVX512_BW{kz}
+  ASMJIT_INST_4x(vpinsrw, Vpinsrw, Xmm, Xmm, Mem, Imm)                 // AVX  AVX512_BW{kz}
+  ASMJIT_INST_2x(vplzcntd, Vplzcntd, Vec, Vec)                         //      AVX512_CD{kz|b32}
+  ASMJIT_INST_2x(vplzcntd, Vplzcntd, Vec, Mem)                         //      AVX512_CD{kz|b32}
+  ASMJIT_INST_2x(vplzcntq, Vplzcntq, Vec, Vec)                         //      AVX512_CD{kz|b64}
+  ASMJIT_INST_2x(vplzcntq, Vplzcntq, Vec, Mem)                         //      AVX512_CD{kz|b64}
+  ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Vec, Vec, Vec)              //      AVX512_IFMA{kz|b64}
+  ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Vec, Vec, Mem)              //      AVX512_IFMA{kz|b64}
+  ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Vec, Vec, Vec)              //      AVX512_IFMA{kz|b64}
+  ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Vec, Vec, Mem)              //      AVX512_IFMA{kz|b64}
+  ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Vec, Vec, Vec)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Vec, Vec, Mem)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Vec, Vec, Vec)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Vec, Vec, Mem)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaskmovd, Vpmaskmovd, Mem, Vec, Vec)                // AVX2
+  ASMJIT_INST_3x(vpmaskmovd, Vpmaskmovd, Vec, Vec, Mem)                // AVX2
+  ASMJIT_INST_3x(vpmaskmovq, Vpmaskmovq, Mem, Vec, Vec)                // AVX2
+  ASMJIT_INST_3x(vpmaskmovq, Vpmaskmovq, Vec, Vec, Mem)                // AVX2
+  ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Vec, Vec, Vec)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Vec, Vec, Mem)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaxub, Vpmaxub, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaxub, Vpmaxub, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaxud, Vpmaxud, Vec, Vec, Vec)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpmaxud, Vpmaxud, Vec, Vec, Mem)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpminsb, Vpminsb, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpminsb, Vpminsb, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpminsd, Vpminsd, Vec, Vec, Vec)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpminsd, Vpminsd, Vec, Vec, Mem)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpminsq, Vpminsq, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpminsq, Vpminsq, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpminsw, Vpminsw, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpminsw, Vpminsw, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpminub, Vpminub, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpminub, Vpminub, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpminud, Vpminud, Vec, Vec, Vec)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpminud, Vpminud, Vec, Vec, Mem)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpminuq, Vpminuq, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpminuq, Vpminuq, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpminuw, Vpminuw, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpminuw, Vpminuw, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovb2m, Vpmovb2m, KReg, Vec)                        //      AVX512_BW
+  ASMJIT_INST_2x(vpmovd2m, Vpmovd2m, KReg, Vec)                        //      AVX512_DQ
+  ASMJIT_INST_2x(vpmovdb, Vpmovdb, Vec, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovdb, Vpmovdb, Mem, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovdw, Vpmovdw, Vec, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovdw, Vpmovdw, Mem, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovm2b, Vpmovm2b, Vec, KReg)                        //      AVX512_BW
+  ASMJIT_INST_2x(vpmovm2d, Vpmovm2d, Vec, KReg)                        //      AVX512_DQ
+  ASMJIT_INST_2x(vpmovm2q, Vpmovm2q, Vec, KReg)                        //      AVX512_DQ
+  ASMJIT_INST_2x(vpmovm2w, Vpmovm2w, Vec, KReg)                        //      AVX512_BW
+  ASMJIT_INST_2x(vpmovmskb, Vpmovmskb, Gp, Vec)                        // AVX+
+  ASMJIT_INST_2x(vpmovq2m, Vpmovq2m, KReg, Vec)                        //      AVX512_DQ
+  ASMJIT_INST_2x(vpmovqb, Vpmovqb, Vec, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovqb, Vpmovqb, Mem, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovqd, Vpmovqd, Vec, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovqd, Vpmovqd, Mem, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovqw, Vpmovqw, Vec, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovqw, Vpmovqw, Mem, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Vec, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Mem, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Vec, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Mem, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Vec, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Mem, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Vec, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Mem, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Vec, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Mem, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovswb, Vpmovswb, Vec, Vec)                         //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovswb, Vpmovswb, Mem, Vec)                         //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Vec, Vec)                       // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Vec, Mem)                       // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Vec, Vec)                       //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Mem, Vec)                       //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovw2m, Vpmovw2m, KReg, Vec)                        //      AVX512_BW
+  ASMJIT_INST_2x(vpmovwb, Vpmovwb, Vec, Vec)                           //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovwb, Vpmovwb, Mem, Vec)                           //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Vec, Vec)                       // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Vec, Mem)                       // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpmuldq, Vpmuldq, Vec, Vec, Vec)                      // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpmuldq, Vpmuldq, Vec, Vec, Mem)                      // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Vec, Vec, Vec)                  // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Vec, Vec, Mem)                  // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Vec, Vec, Vec)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Vec, Vec, Mem)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmulhw, Vpmulhw, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmulhw, Vpmulhw, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmulld, Vpmulld, Vec, Vec, Vec)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpmulld, Vpmulld, Vec, Vec, Mem)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpmullq, Vpmullq, Vec, Vec, Vec)                      //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vpmullq, Vpmullq, Vec, Vec, Mem)                      //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vpmullw, Vpmullw, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmullw, Vpmullw, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Vec, Vec, Vec)        //      AVX512_VBMI{kz|b64}
+  ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Vec, Vec, Mem)        //      AVX512_VBMI{kz|b64}
+  ASMJIT_INST_3x(vpmuludq, Vpmuludq, Vec, Vec, Vec)                    // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpmuludq, Vpmuludq, Vec, Vec, Mem)                    // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vpopcntb, Vpopcntb, Vec, Vec)                         //      AVX512_BITALG{kz|b32}
+  ASMJIT_INST_2x(vpopcntb, Vpopcntb, Vec, Mem)                         //      AVX512_BITALG{kz|b32}
+  ASMJIT_INST_2x(vpopcntd, Vpopcntd, Vec, Vec)                         //      AVX512_VPOPCNTDQ{kz|b32}
+  ASMJIT_INST_2x(vpopcntd, Vpopcntd, Vec, Mem)                         //      AVX512_VPOPCNTDQ{kz|b32}
+  ASMJIT_INST_2x(vpopcntq, Vpopcntq, Vec, Vec)                         //      AVX512_VPOPCNTDQ{kz|b64}
+  ASMJIT_INST_2x(vpopcntq, Vpopcntq, Vec, Mem)                         //      AVX512_VPOPCNTDQ{kz|b64}
+  ASMJIT_INST_2x(vpopcntw, Vpopcntw, Vec, Vec)                         //      AVX512_BITALG{kz|b32}
+  ASMJIT_INST_2x(vpopcntw, Vpopcntw, Vec, Mem)                         //      AVX512_BITALG{kz|b32}
+  ASMJIT_INST_3x(vpor, Vpor, Vec, Vec, Vec)                            // AV+
+  ASMJIT_INST_3x(vpor, Vpor, Vec, Vec, Mem)                            // AVX+
+  ASMJIT_INST_3x(vpord, Vpord, Vec, Vec, Vec)                          //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpord, Vpord, Vec, Vec, Mem)                          //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vporq, Vporq, Vec, Vec, Vec)                          //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vporq, Vporq, Vec, Vec, Mem)                          //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vprold, Vprold, Vec, Vec, Imm)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vprold, Vprold, Vec, Mem, Imm)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vprolq, Vprolq, Vec, Vec, Imm)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vprolq, Vprolq, Vec, Mem, Imm)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vprolvd, Vprolvd, Vec, Vec, Vec)                      //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vprolvd, Vprolvd, Vec, Vec, Mem)                      //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vprolvq, Vprolvq, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vprolvq, Vprolvq, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vprord, Vprord, Vec, Vec, Imm)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vprord, Vprord, Vec, Mem, Imm)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vprorq, Vprorq, Vec, Vec, Imm)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vprorq, Vprorq, Vec, Mem, Imm)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vprorvd, Vprorvd, Vec, Vec, Vec)                      //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vprorvd, Vprorvd, Vec, Vec, Mem)                      //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vprorvq, Vprorvq, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vprorvq, Vprorvq, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsadbw, Vpsadbw, Vec, Vec, Vec)                      // AVX+ AVX512_BW
+  ASMJIT_INST_3x(vpsadbw, Vpsadbw, Vec, Vec, Mem)                      // AVX+ AVX512_BW
+  ASMJIT_INST_2x(vpscatterdd, Vpscatterdd, Mem, Vec)                   //      AVX512_F{k}
+  ASMJIT_INST_2x(vpscatterdq, Vpscatterdq, Mem, Vec)                   //      AVX512_F{k}
+  ASMJIT_INST_2x(vpscatterqd, Vpscatterqd, Mem, Vec)                   //      AVX512_F{k}
+  ASMJIT_INST_2x(vpscatterqq, Vpscatterqq, Mem, Vec)                   //      AVX512_F{k}
+  ASMJIT_INST_4x(vpshldd, Vpshldd, Vec, Vec, Vec, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshldd, Vpshldd, Vec, Vec, Mem, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshldq, Vpshldq, Vec, Vec, Vec, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshldq, Vpshldq, Vec, Vec, Mem, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshldvd, Vpshldvd, Vec, Vec, Vec)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshldvd, Vpshldvd, Vec, Vec, Mem)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshldvq, Vpshldvq, Vec, Vec, Vec)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshldvq, Vpshldvq, Vec, Vec, Mem)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshldvw, Vpshldvw, Vec, Vec, Vec)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshldvw, Vpshldvw, Vec, Vec, Mem)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshldw, Vpshldw, Vec, Vec, Vec, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshldw, Vpshldw, Vec, Vec, Mem, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshrdd, Vpshrdd, Vec, Vec, Vec, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshrdd, Vpshrdd, Vec, Vec, Mem, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshrdq, Vpshrdq, Vec, Vec, Vec, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshrdq, Vpshrdq, Vec, Vec, Mem, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Vec, Vec, Vec)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Vec, Vec, Mem)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Vec, Vec, Vec)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Vec, Vec, Mem)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Vec, Vec, Vec)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Vec, Vec, Mem)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshrdw, Vpshrdw, Vec, Vec, Vec, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshrdw, Vpshrdw, Vec, Vec, Mem, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshufb, Vpshufb, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpshufb, Vpshufb, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Vec, Vec)           //      AVX512_BITALG{k}
+  ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Vec, Mem)           //      AVX512_BITALG{k}
+  ASMJIT_INST_3x(vpshufd, Vpshufd, Vec, Vec, Imm)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpshufd, Vpshufd, Vec, Mem, Imm)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpshufhw, Vpshufhw, Vec, Vec, Imm)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpshufhw, Vpshufhw, Vec, Mem, Imm)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpshuflw, Vpshuflw, Vec, Vec, Imm)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpshuflw, Vpshuflw, Vec, Mem, Imm)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsignb, Vpsignb, Vec, Vec, Vec)                      // AVX+
+  ASMJIT_INST_3x(vpsignb, Vpsignb, Vec, Vec, Mem)                      // AVX+
+  ASMJIT_INST_3x(vpsignd, Vpsignd, Vec, Vec, Vec)                      // AVX+
+  ASMJIT_INST_3x(vpsignd, Vpsignd, Vec, Vec, Mem)                      // AVX+
+  ASMJIT_INST_3x(vpsignw, Vpsignw, Vec, Vec, Vec)                      // AVX+
+  ASMJIT_INST_3x(vpsignw, Vpsignw, Vec, Vec, Mem)                      // AVX+
+  ASMJIT_INST_3x(vpslld, Vpslld, Vec, Vec, Imm)                        // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpslld, Vpslld, Vec, Vec, Vec)                        // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpslld, Vpslld, Vec, Vec, Mem)                        // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpslld, Vpslld, Vec, Mem, Imm)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpslldq, Vpslldq, Vec, Vec, Imm)                      // AVX+ AVX512_BW
+  ASMJIT_INST_3x(vpslldq, Vpslldq, Vec, Mem, Imm)                      //      AVX512_BW
+  ASMJIT_INST_3x(vpsllq, Vpsllq, Vec, Vec, Imm)                        // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsllq, Vpsllq, Vec, Vec, Vec)                        // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpsllq, Vpsllq, Vec, Vec, Mem)                        // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpsllq, Vpsllq, Vec, Mem, Imm)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsllvd, Vpsllvd, Vec, Vec, Vec)                      // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsllvd, Vpsllvd, Vec, Vec, Mem)                      // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsllvq, Vpsllvq, Vec, Vec, Vec)                      // AVX2 AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsllvq, Vpsllvq, Vec, Vec, Mem)                      // AVX2 AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsllvw, Vpsllvw, Vec, Vec, Vec)                      //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsllvw, Vpsllvw, Vec, Vec, Mem)                      //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsllw, Vpsllw, Vec, Vec, Imm)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsllw, Vpsllw, Vec, Vec, Vec)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsllw, Vpsllw, Vec, Vec, Mem)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsllw, Vpsllw, Vec, Mem, Imm)                        //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsrad, Vpsrad, Vec, Vec, Imm)                        // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsrad, Vpsrad, Vec, Vec, Vec)                        // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpsrad, Vpsrad, Vec, Vec, Mem)                        // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpsrad, Vpsrad, Vec, Mem, Imm)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsraq, Vpsraq, Vec, Vec, Vec)                        //      AVX512_F{kz}
+  ASMJIT_INST_3x(vpsraq, Vpsraq, Vec, Vec, Mem)                        //      AVX512_F{kz}
+  ASMJIT_INST_3x(vpsraq, Vpsraq, Vec, Vec, Imm)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsraq, Vpsraq, Vec, Mem, Imm)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsravd, Vpsravd, Vec, Vec, Vec)                      // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsravd, Vpsravd, Vec, Vec, Mem)                      // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsravq, Vpsravq, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsravq, Vpsravq, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsravw, Vpsravw, Vec, Vec, Vec)                      //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsravw, Vpsravw, Vec, Vec, Mem)                      //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsraw, Vpsraw, Vec, Vec, Imm)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsraw, Vpsraw, Vec, Vec, Vec)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsraw, Vpsraw, Vec, Vec, Mem)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsraw, Vpsraw, Vec, Mem, Imm)                        //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsrld, Vpsrld, Vec, Vec, Imm)                        // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsrld, Vpsrld, Vec, Vec, Vec)                        // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpsrld, Vpsrld, Vec, Vec, Mem)                        // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpsrld, Vpsrld, Vec, Mem, Imm)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsrldq, Vpsrldq, Vec, Vec, Imm)                      // AVX+ AVX512_BW
+  ASMJIT_INST_3x(vpsrldq, Vpsrldq, Vec, Mem, Imm)                      //      AVX512_BW
+  ASMJIT_INST_3x(vpsrlq, Vpsrlq, Vec, Vec, Imm)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsrlq, Vpsrlq, Vec, Vec, Vec)                        // AVX  AVX512_F{kz}
+  ASMJIT_INST_3x(vpsrlq, Vpsrlq, Vec, Vec, Mem)                        // AVX  AVX512_F{kz}
+  ASMJIT_INST_3x(vpsrlq, Vpsrlq, Vec, Mem, Imm)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Vec, Vec, Vec)                      // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Vec, Vec, Mem)                      // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Vec, Vec, Vec)                      // AVX2 AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Vec, Vec, Mem)                      // AVX2 AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Vec, Vec, Vec)                      //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Vec, Vec, Mem)                      //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsrlw, Vpsrlw, Vec, Vec, Imm)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsrlw, Vpsrlw, Vec, Vec, Vec)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsrlw, Vpsrlw, Vec, Vec, Mem)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsrlw, Vpsrlw, Vec, Mem, Imm)                        //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubb, Vpsubb, Vec, Vec, Vec)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubb, Vpsubb, Vec, Vec, Mem)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubd, Vpsubd, Vec, Vec, Vec)                        // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsubd, Vpsubd, Vec, Vec, Mem)                        // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsubq, Vpsubq, Vec, Vec, Vec)                        // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsubq, Vpsubq, Vec, Vec, Mem)                        // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsubsb, Vpsubsb, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubsb, Vpsubsb, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubsw, Vpsubsw, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubsw, Vpsubsw, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubusb, Vpsubusb, Vec, Vec, Vec)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubusb, Vpsubusb, Vec, Vec, Mem)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubusw, Vpsubusw, Vec, Vec, Vec)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubusw, Vpsubusw, Vec, Vec, Mem)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubw, Vpsubw, Vec, Vec, Vec)                        // AVX  AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubw, Vpsubw, Vec, Vec, Mem)                        // AVX  AVX512_BW{kz}
+  ASMJIT_INST_4x(vpternlogd, Vpternlogd, Vec, Vec, Vec, Imm)           //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vpternlogd, Vpternlogd, Vec, Vec, Mem, Imm)           //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vpternlogq, Vpternlogq, Vec, Vec, Vec, Imm)           //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vpternlogq, Vpternlogq, Vec, Vec, Mem, Imm)           //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vptest, Vptest, Vec, Vec)                             // AVX
+  ASMJIT_INST_2x(vptest, Vptest, Vec, Mem)                             // AVX
+  ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Vec, Vec)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Vec, Mem)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Vec, Vec)                   //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Vec, Mem)                   //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Vec, Vec)                   //      AVX512_F{k|b64}
+  ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Vec, Mem)                   //      AVX512_F{k|b64}
+  ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Vec, Vec)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Vec, Mem)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Vec, Vec)                 //      AVX512_BW{k}
+  ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Vec, Mem)                 //      AVX512_BW{k}
+  ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Vec, Vec)                 //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Vec, Mem)                 //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Vec, Vec)                 //      AVX512_F{k|b64}
+  ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Vec, Mem)                 //      AVX512_F{k|b64}
+  ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Vec, Vec)                 //      AVX512_BW{k}
+  ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Vec, Mem)                 //      AVX512_BW{k}
+  ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Vec, Vec, Vec)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Vec, Vec, Mem)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Vec, Vec, Vec)                // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Vec, Vec, Mem)                // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Vec, Vec, Vec)              // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Vec, Vec, Mem)              // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Vec, Vec, Vec)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Vec, Vec, Mem)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Vec, Vec, Vec)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Vec, Vec, Mem)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Vec, Vec, Vec)                // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Vec, Vec, Mem)                // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Vec, Vec, Vec)              // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Vec, Vec, Mem)              // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Vec, Vec, Vec)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Vec, Vec, Mem)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpxor, Vpxor, Vec, Vec, Vec)                          // AVX+
+  ASMJIT_INST_3x(vpxor, Vpxor, Vec, Vec, Mem)                          // AVX+
+  ASMJIT_INST_3x(vpxord, Vpxord, Vec, Vec, Vec)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpxord, Vpxord, Vec, Vec, Mem)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpxorq, Vpxorq, Vec, Vec, Vec)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpxorq, Vpxorq, Vec, Vec, Mem)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vrangepd, Vrangepd, Vec, Vec, Vec, Imm)               //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_4x(vrangepd, Vrangepd, Vec, Vec, Mem, Imm)               //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_4x(vrangeps, Vrangeps, Vec, Vec, Vec, Imm)               //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_4x(vrangeps, Vrangeps, Vec, Vec, Mem, Imm)               //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_4x(vrangesd, Vrangesd, Xmm, Xmm, Xmm, Imm)               //      AVX512_DQ{kz|sae}
+  ASMJIT_INST_4x(vrangesd, Vrangesd, Xmm, Xmm, Mem, Imm)               //      AVX512_DQ{kz|sae}
+  ASMJIT_INST_4x(vrangess, Vrangess, Xmm, Xmm, Xmm, Imm)               //      AVX512_DQ{kz|sae}
+  ASMJIT_INST_4x(vrangess, Vrangess, Xmm, Xmm, Mem, Imm)               //      AVX512_DQ{kz|sae}
+  ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Vec, Vec)                         //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Vec, Mem)                         //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Vec, Vec)                         //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Vec, Mem)                         //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vrcp14sd, Vrcp14sd, Xmm, Xmm, Xmm)                    //      AVX512_F{kz}
+  ASMJIT_INST_3x(vrcp14sd, Vrcp14sd, Xmm, Xmm, Mem)                    //      AVX512_F{kz}
+  ASMJIT_INST_3x(vrcp14ss, Vrcp14ss, Xmm, Xmm, Xmm)                    //      AVX512_F{kz}
+  ASMJIT_INST_3x(vrcp14ss, Vrcp14ss, Xmm, Xmm, Mem)                    //      AVX512_F{kz}
+  ASMJIT_INST_2x(vrcp28pd, Vrcp28pd, Vec, Vec)                         //      AVX512_ER{kz|sae|b64}
+  ASMJIT_INST_2x(vrcp28pd, Vrcp28pd, Vec, Mem)                         //      AVX512_ER{kz|sae|b64}
+  ASMJIT_INST_2x(vrcp28ps, Vrcp28ps, Vec, Vec)                         //      AVX512_ER{kz|sae|b32}
+  ASMJIT_INST_2x(vrcp28ps, Vrcp28ps, Vec, Mem)                         //      AVX512_ER{kz|sae|b32}
+  ASMJIT_INST_3x(vrcp28sd, Vrcp28sd, Xmm, Xmm, Xmm)                    //      AVX512_ER{kz|sae}
+  ASMJIT_INST_3x(vrcp28sd, Vrcp28sd, Xmm, Xmm, Mem)                    //      AVX512_ER{kz|sae}
+  ASMJIT_INST_3x(vrcp28ss, Vrcp28ss, Xmm, Xmm, Xmm)                    //      AVX512_ER{kz|sae}
+  ASMJIT_INST_3x(vrcp28ss, Vrcp28ss, Xmm, Xmm, Mem)                    //      AVX512_ER{kz|sae}
+  ASMJIT_INST_2x(vrcpps, Vrcpps, Vec, Vec)                             // AVX
+  ASMJIT_INST_2x(vrcpps, Vrcpps, Vec, Mem)                             // AVX
+  ASMJIT_INST_3x(vrcpss, Vrcpss, Xmm, Xmm, Xmm)                        // AVX
+  ASMJIT_INST_3x(vrcpss, Vrcpss, Xmm, Xmm, Mem)                        // AVX
+  ASMJIT_INST_3x(vreducepd, Vreducepd, Vec, Vec, Imm)                  //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vreducepd, Vreducepd, Vec, Mem, Imm)                  //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vreduceps, Vreduceps, Vec, Vec, Imm)                  //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_3x(vreduceps, Vreduceps, Vec, Mem, Imm)                  //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_4x(vreducesd, Vreducesd, Xmm, Xmm, Xmm, Imm)             //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vreducesd, Vreducesd, Xmm, Xmm, Mem, Imm)             //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vreducess, Vreducess, Xmm, Xmm, Xmm, Imm)             //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vreducess, Vreducess, Xmm, Xmm, Mem, Imm)             //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vrndscalepd, Vrndscalepd, Vec, Vec, Imm)              //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vrndscalepd, Vrndscalepd, Vec, Mem, Imm)              //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vrndscaleps, Vrndscaleps, Vec, Vec, Imm)              //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vrndscaleps, Vrndscaleps, Vec, Mem, Imm)              //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vrndscalesd, Vrndscalesd, Xmm, Xmm, Xmm, Imm)         //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vrndscalesd, Vrndscalesd, Xmm, Xmm, Mem, Imm)         //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vrndscaless, Vrndscaless, Xmm, Xmm, Xmm, Imm)         //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vrndscaless, Vrndscaless, Xmm, Xmm, Mem, Imm)         //      AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vroundpd, Vroundpd, Vec, Vec, Imm)                    // AVX
+  ASMJIT_INST_3x(vroundpd, Vroundpd, Vec, Mem, Imm)                    // AVX
+  ASMJIT_INST_3x(vroundps, Vroundps, Vec, Vec, Imm)                    // AVX
+  ASMJIT_INST_3x(vroundps, Vroundps, Vec, Mem, Imm)                    // AVX
+  ASMJIT_INST_4x(vroundsd, Vroundsd, Xmm, Xmm, Xmm, Imm)               // AVX
+  ASMJIT_INST_4x(vroundsd, Vroundsd, Xmm, Xmm, Mem, Imm)               // AVX
+  ASMJIT_INST_4x(vroundss, Vroundss, Xmm, Xmm, Xmm, Imm)               // AVX
+  ASMJIT_INST_4x(vroundss, Vroundss, Xmm, Xmm, Mem, Imm)               // AVX
+  ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Vec, Vec)                     //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Vec, Mem)                     //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Vec, Vec)                     //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Vec, Mem)                     //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vrsqrt14sd, Vrsqrt14sd, Xmm, Xmm, Xmm)                //      AVX512_F{kz}
+  ASMJIT_INST_3x(vrsqrt14sd, Vrsqrt14sd, Xmm, Xmm, Mem)                //      AVX512_F{kz}
+  ASMJIT_INST_3x(vrsqrt14ss, Vrsqrt14ss, Xmm, Xmm, Xmm)                //      AVX512_F{kz}
+  ASMJIT_INST_3x(vrsqrt14ss, Vrsqrt14ss, Xmm, Xmm, Mem)                //      AVX512_F{kz}
+  ASMJIT_INST_2x(vrsqrt28pd, Vrsqrt28pd, Vec, Vec)                     //      AVX512_ER{kz|sae|b64}
+  ASMJIT_INST_2x(vrsqrt28pd, Vrsqrt28pd, Vec, Mem)                     //      AVX512_ER{kz|sae|b64}
+  ASMJIT_INST_2x(vrsqrt28ps, Vrsqrt28ps, Vec, Vec)                     //      AVX512_ER{kz|sae|b32}
+  ASMJIT_INST_2x(vrsqrt28ps, Vrsqrt28ps, Vec, Mem)                     //      AVX512_ER{kz|sae|b32}
+  ASMJIT_INST_3x(vrsqrt28sd, Vrsqrt28sd, Xmm, Xmm, Xmm)                //      AVX512_ER{kz|sae}
+  ASMJIT_INST_3x(vrsqrt28sd, Vrsqrt28sd, Xmm, Xmm, Mem)                //      AVX512_ER{kz|sae}
+  ASMJIT_INST_3x(vrsqrt28ss, Vrsqrt28ss, Xmm, Xmm, Xmm)                //      AVX512_ER{kz|sae}
+  ASMJIT_INST_3x(vrsqrt28ss, Vrsqrt28ss, Xmm, Xmm, Mem)                //      AVX512_ER{kz|sae}
+  ASMJIT_INST_2x(vrsqrtps, Vrsqrtps, Vec, Vec)                         // AVX
+  ASMJIT_INST_2x(vrsqrtps, Vrsqrtps, Vec, Mem)                         // AVX
+  ASMJIT_INST_3x(vrsqrtss, Vrsqrtss, Xmm, Xmm, Xmm)                    // AVX
+  ASMJIT_INST_3x(vrsqrtss, Vrsqrtss, Xmm, Xmm, Mem)                    // AVX
+  ASMJIT_INST_3x(vscalefpd, Vscalefpd, Vec, Vec, Vec)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vscalefpd, Vscalefpd, Vec, Vec, Mem)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vscalefps, Vscalefps, Vec, Vec, Vec)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vscalefps, Vscalefps, Vec, Vec, Mem)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vscalefsd, Vscalefsd, Xmm, Xmm, Xmm)                  //      AVX512_F{kz|er}
+  ASMJIT_INST_3x(vscalefsd, Vscalefsd, Xmm, Xmm, Mem)                  //      AVX512_F{kz|er}
+  ASMJIT_INST_3x(vscalefss, Vscalefss, Xmm, Xmm, Xmm)                  //      AVX512_F{kz|er}
+  ASMJIT_INST_3x(vscalefss, Vscalefss, Xmm, Xmm, Mem)                  //      AVX512_F{kz|er}
+  ASMJIT_INST_2x(vscatterdpd, Vscatterdpd, Mem, Vec)                   //      AVX512_F{k}
+  ASMJIT_INST_2x(vscatterdps, Vscatterdps, Mem, Vec)                   //      AVX512_F{k}
+  ASMJIT_INST_1x(vscatterpf0dpd, Vscatterpf0dpd, Mem)                  //      AVX512_PF{k}
+  ASMJIT_INST_1x(vscatterpf0dps, Vscatterpf0dps, Mem)                  //      AVX512_PF{k}
+  ASMJIT_INST_1x(vscatterpf0qpd, Vscatterpf0qpd, Mem)                  //      AVX512_PF{k}
+  ASMJIT_INST_1x(vscatterpf0qps, Vscatterpf0qps, Mem)                  //      AVX512_PF{k}
+  ASMJIT_INST_1x(vscatterpf1dpd, Vscatterpf1dpd, Mem)                  //      AVX512_PF{k}
+  ASMJIT_INST_1x(vscatterpf1dps, Vscatterpf1dps, Mem)                  //      AVX512_PF{k}
+  ASMJIT_INST_1x(vscatterpf1qpd, Vscatterpf1qpd, Mem)                  //      AVX512_PF{k}
+  ASMJIT_INST_1x(vscatterpf1qps, Vscatterpf1qps, Mem)                  //      AVX512_PF{k}
+  ASMJIT_INST_2x(vscatterqpd, Vscatterqpd, Mem, Vec)                   //      AVX512_F{k}
+  ASMJIT_INST_2x(vscatterqps, Vscatterqps, Mem, Vec)                   //      AVX512_F{k}
+  ASMJIT_INST_4x(vshuff32x4, Vshuff32x4, Vec, Vec, Vec, Imm)           //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vshuff32x4, Vshuff32x4, Vec, Vec, Mem, Imm)           //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vshuff64x2, Vshuff64x2, Vec, Vec, Vec, Imm)           //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vshuff64x2, Vshuff64x2, Vec, Vec, Mem, Imm)           //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vshufi32x4, Vshufi32x4, Vec, Vec, Vec, Imm)           //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vshufi32x4, Vshufi32x4, Vec, Vec, Mem, Imm)           //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vshufi64x2, Vshufi64x2, Vec, Vec, Vec, Imm)           //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vshufi64x2, Vshufi64x2, Vec, Vec, Mem, Imm)           //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vshufpd, Vshufpd, Vec, Vec, Vec, Imm)                 // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vshufpd, Vshufpd, Vec, Vec, Mem, Imm)                 // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vshufps, Vshufps, Vec, Vec, Vec, Imm)                 // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vshufps, Vshufps, Vec, Vec, Mem, Imm)                 // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Vec, Vec)                           // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Vec, Mem)                           // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vsqrtps, Vsqrtps, Vec, Vec)                           // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vsqrtps, Vsqrtps, Vec, Mem)                           // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vsqrtsd, Vsqrtsd, Xmm, Xmm, Xmm)                      // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vsqrtsd, Vsqrtsd, Xmm, Xmm, Mem)                      // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vsqrtss, Vsqrtss, Xmm, Xmm, Xmm)                      // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vsqrtss, Vsqrtss, Xmm, Xmm, Mem)                      // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_1x(vstmxcsr, Vstmxcsr, Mem)                              // AVX
+  ASMJIT_INST_3x(vsubpd, Vsubpd, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vsubpd, Vsubpd, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vsubps, Vsubps, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vsubps, Vsubps, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vsubsd, Vsubsd, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vsubsd, Vsubsd, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vsubss, Vsubss, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vsubss, Vsubss, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_2x(vtestpd, Vtestpd, Vec, Vec)                           // AVX
+  ASMJIT_INST_2x(vtestpd, Vtestpd, Vec, Mem)                           // AVX
+  ASMJIT_INST_2x(vtestps, Vtestps, Vec, Vec)                           // AVX
+  ASMJIT_INST_2x(vtestps, Vtestps, Vec, Mem)                           // AVX
+  ASMJIT_INST_2x(vucomisd, Vucomisd, Xmm, Xmm)                         // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vucomisd, Vucomisd, Xmm, Mem)                         // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vucomiss, Vucomiss, Xmm, Xmm)                         // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vucomiss, Vucomiss, Xmm, Mem)                         // AVX  AVX512_F{sae}
+  ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Vec, Vec, Vec)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Vec, Vec, Mem)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vunpckhps, Vunpckhps, Vec, Vec, Vec)                  // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vunpckhps, Vunpckhps, Vec, Vec, Mem)                  // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Vec, Vec, Vec)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Vec, Vec, Mem)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vunpcklps, Vunpcklps, Vec, Vec, Vec)                  // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vunpcklps, Vunpcklps, Vec, Vec, Mem)                  // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vxorpd, Vxorpd, Vec, Vec, Vec)                        // AVX  AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vxorpd, Vxorpd, Vec, Vec, Mem)                        // AVX  AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vxorps, Vxorps, Vec, Vec, Vec)                        // AVX  AVX512_DQ{kz|b32}
+  ASMJIT_INST_3x(vxorps, Vxorps, Vec, Vec, Mem)                        // AVX  AVX512_DQ{kz|b32}
+  ASMJIT_INST_0x(vzeroall, Vzeroall)                                   // AVX
+  ASMJIT_INST_0x(vzeroupper, Vzeroupper)                               // AVX
+
+  //! \}
+
+  //! \name FMA4 Instructions
+  //! \{
+
+  ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Vec, Vec, Vec, Vec)               // FMA4
+  ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Vec, Vec, Mem, Vec)               // FMA4
+  ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Vec, Vec, Vec, Mem)               // FMA4
+  ASMJIT_INST_4x(vfmaddps, Vfmaddps, Vec, Vec, Vec, Vec)               // FMA4
+  ASMJIT_INST_4x(vfmaddps, Vfmaddps, Vec, Vec, Mem, Vec)               // FMA4
+  ASMJIT_INST_4x(vfmaddps, Vfmaddps, Vec, Vec, Vec, Mem)               // FMA4
+  ASMJIT_INST_4x(vfmaddsd, Vfmaddsd, Xmm, Xmm, Xmm, Xmm)               // FMA4
+  ASMJIT_INST_4x(vfmaddsd, Vfmaddsd, Xmm, Xmm, Mem, Xmm)               // FMA4
+  ASMJIT_INST_4x(vfmaddsd, Vfmaddsd, Xmm, Xmm, Xmm, Mem)               // FMA4
+  ASMJIT_INST_4x(vfmaddss, Vfmaddss, Xmm, Xmm, Xmm, Xmm)               // FMA4
+  ASMJIT_INST_4x(vfmaddss, Vfmaddss, Xmm, Xmm, Mem, Xmm)               // FMA4
+  ASMJIT_INST_4x(vfmaddss, Vfmaddss, Xmm, Xmm, Xmm, Mem)               // FMA4
+  ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Vec, Vec, Vec, Vec)         // FMA4
+  ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Vec, Vec, Mem, Vec)         // FMA4
+  ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Vec, Vec, Vec, Mem)         // FMA4
+  ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Vec, Vec, Vec, Vec)         // FMA4
+  ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Vec, Vec, Mem, Vec)         // FMA4
+  ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Vec, Vec, Vec, Mem)         // FMA4
+  ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Vec, Vec, Vec, Vec)         // FMA4
+  ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Vec, Vec, Mem, Vec)         // FMA4
+  ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Vec, Vec, Vec, Mem)         // FMA4
+  ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Vec, Vec, Vec, Vec)         // FMA4
+  ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Vec, Vec, Mem, Vec)         // FMA4
+  ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Vec, Vec, Vec, Mem)         // FMA4
+  ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Vec, Vec, Vec, Vec)               // FMA4
+  ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Vec, Vec, Mem, Vec)               // FMA4
+  ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Vec, Vec, Vec, Mem)               // FMA4
+  ASMJIT_INST_4x(vfmsubps, Vfmsubps, Vec, Vec, Vec, Vec)               // FMA4
+  ASMJIT_INST_4x(vfmsubps, Vfmsubps, Vec, Vec, Mem, Vec)               // FMA4
+  ASMJIT_INST_4x(vfmsubps, Vfmsubps, Vec, Vec, Vec, Mem)               // FMA4
+  ASMJIT_INST_4x(vfmsubsd, Vfmsubsd, Xmm, Xmm, Xmm, Xmm)               // FMA4
+  ASMJIT_INST_4x(vfmsubsd, Vfmsubsd, Xmm, Xmm, Mem, Xmm)               // FMA4
+  ASMJIT_INST_4x(vfmsubsd, Vfmsubsd, Xmm, Xmm, Xmm, Mem)               // FMA4
+  ASMJIT_INST_4x(vfmsubss, Vfmsubss, Xmm, Xmm, Xmm, Xmm)               // FMA4
+  ASMJIT_INST_4x(vfmsubss, Vfmsubss, Xmm, Xmm, Mem, Xmm)               // FMA4
+  ASMJIT_INST_4x(vfmsubss, Vfmsubss, Xmm, Xmm, Xmm, Mem)               // FMA4
+  ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Vec, Vec, Vec, Vec)             // FMA4
+  ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Vec, Vec, Mem, Vec)             // FMA4
+  ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Vec, Vec, Vec, Mem)             // FMA4
+  ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Vec, Vec, Vec, Vec)             // FMA4
+  ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Vec, Vec, Mem, Vec)             // FMA4
+  ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Vec, Vec, Vec, Mem)             // FMA4
+  ASMJIT_INST_4x(vfnmaddsd, Vfnmaddsd, Xmm, Xmm, Xmm, Xmm)             // FMA4
+  ASMJIT_INST_4x(vfnmaddsd, Vfnmaddsd, Xmm, Xmm, Mem, Xmm)             // FMA4
+  ASMJIT_INST_4x(vfnmaddsd, Vfnmaddsd, Xmm, Xmm, Xmm, Mem)             // FMA4
+  ASMJIT_INST_4x(vfnmaddss, Vfnmaddss, Xmm, Xmm, Xmm, Xmm)             // FMA4
+  ASMJIT_INST_4x(vfnmaddss, Vfnmaddss, Xmm, Xmm, Mem, Xmm)             // FMA4
+  ASMJIT_INST_4x(vfnmaddss, Vfnmaddss, Xmm, Xmm, Xmm, Mem)             // FMA4
+  ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Vec, Vec, Vec, Vec)             // FMA4
+  ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Vec, Vec, Mem, Vec)             // FMA4
+  ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Vec, Vec, Vec, Mem)             // FMA4
+  ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Vec, Vec, Vec, Vec)             // FMA4
+  ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Vec, Vec, Mem, Vec)             // FMA4
+  ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Vec, Vec, Vec, Mem)             // FMA4
+  ASMJIT_INST_4x(vfnmsubsd, Vfnmsubsd, Xmm, Xmm, Xmm, Xmm)             // FMA4
+  ASMJIT_INST_4x(vfnmsubsd, Vfnmsubsd, Xmm, Xmm, Mem, Xmm)             // FMA4
+  ASMJIT_INST_4x(vfnmsubsd, Vfnmsubsd, Xmm, Xmm, Xmm, Mem)             // FMA4
+  ASMJIT_INST_4x(vfnmsubss, Vfnmsubss, Xmm, Xmm, Xmm, Xmm)             // FMA4
+  ASMJIT_INST_4x(vfnmsubss, Vfnmsubss, Xmm, Xmm, Mem, Xmm)             // FMA4
+  ASMJIT_INST_4x(vfnmsubss, Vfnmsubss, Xmm, Xmm, Xmm, Mem)             // FMA4
+
+  //! \}
+
+  //! \name XOP Instructions (Deprecated)
+  //! \{
+
+  ASMJIT_INST_2x(vfrczpd, Vfrczpd, Vec, Vec)                           // XOP
+  ASMJIT_INST_2x(vfrczpd, Vfrczpd, Vec, Mem)                           // XOP
+  ASMJIT_INST_2x(vfrczps, Vfrczps, Vec, Vec)                           // XOP
+  ASMJIT_INST_2x(vfrczps, Vfrczps, Vec, Mem)                           // XOP
+  ASMJIT_INST_2x(vfrczsd, Vfrczsd, Xmm, Xmm)                           // XOP
+  ASMJIT_INST_2x(vfrczsd, Vfrczsd, Xmm, Mem)                           // XOP
+  ASMJIT_INST_2x(vfrczss, Vfrczss, Xmm, Xmm)                           // XOP
+  ASMJIT_INST_2x(vfrczss, Vfrczss, Xmm, Mem)                           // XOP
+  ASMJIT_INST_4x(vpcmov, Vpcmov, Vec, Vec, Vec, Vec)                   // XOP
+  ASMJIT_INST_4x(vpcmov, Vpcmov, Vec, Vec, Mem, Vec)                   // XOP
+  ASMJIT_INST_4x(vpcmov, Vpcmov, Vec, Vec, Vec, Mem)                   // XOP
+  ASMJIT_INST_4x(vpcomb, Vpcomb, Xmm, Xmm, Xmm, Imm)                   // XOP
+  ASMJIT_INST_4x(vpcomb, Vpcomb, Xmm, Xmm, Mem, Imm)                   // XOP
+  ASMJIT_INST_4x(vpcomd, Vpcomd, Xmm, Xmm, Xmm, Imm)                   // XOP
+  ASMJIT_INST_4x(vpcomd, Vpcomd, Xmm, Xmm, Mem, Imm)                   // XOP
+  ASMJIT_INST_4x(vpcomq, Vpcomq, Xmm, Xmm, Xmm, Imm)                   // XOP
+  ASMJIT_INST_4x(vpcomq, Vpcomq, Xmm, Xmm, Mem, Imm)                   // XOP
+  ASMJIT_INST_4x(vpcomw, Vpcomw, Xmm, Xmm, Xmm, Imm)                   // XOP
+  ASMJIT_INST_4x(vpcomw, Vpcomw, Xmm, Xmm, Mem, Imm)                   // XOP
+  ASMJIT_INST_4x(vpcomub, Vpcomub, Xmm, Xmm, Xmm, Imm)                 // XOP
+  ASMJIT_INST_4x(vpcomub, Vpcomub, Xmm, Xmm, Mem, Imm)                 // XOP
+  ASMJIT_INST_4x(vpcomud, Vpcomud, Xmm, Xmm, Xmm, Imm)                 // XOP
+  ASMJIT_INST_4x(vpcomud, Vpcomud, Xmm, Xmm, Mem, Imm)                 // XOP
+  ASMJIT_INST_4x(vpcomuq, Vpcomuq, Xmm, Xmm, Xmm, Imm)                 // XOP
+  ASMJIT_INST_4x(vpcomuq, Vpcomuq, Xmm, Xmm, Mem, Imm)                 // XOP
+  ASMJIT_INST_4x(vpcomuw, Vpcomuw, Xmm, Xmm, Xmm, Imm)                 // XOP
+  ASMJIT_INST_4x(vpcomuw, Vpcomuw, Xmm, Xmm, Mem, Imm)                 // XOP
+  ASMJIT_INST_5x(vpermil2pd, Vpermil2pd, Vec, Vec, Vec, Vec, Imm)      // XOP
+  ASMJIT_INST_5x(vpermil2pd, Vpermil2pd, Vec, Vec, Mem, Vec, Imm)      // XOP
+  ASMJIT_INST_5x(vpermil2pd, Vpermil2pd, Vec, Vec, Vec, Mem, Imm)      // XOP
+  ASMJIT_INST_5x(vpermil2ps, Vpermil2ps, Vec, Vec, Vec, Vec, Imm)      // XOP
+  ASMJIT_INST_5x(vpermil2ps, Vpermil2ps, Vec, Vec, Mem, Vec, Imm)      // XOP
+  ASMJIT_INST_5x(vpermil2ps, Vpermil2ps, Vec, Vec, Vec, Mem, Imm)      // XOP
+  ASMJIT_INST_2x(vphaddbd, Vphaddbd, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphaddbd, Vphaddbd, Xmm, Mem)                         // XOP
+  ASMJIT_INST_2x(vphaddbq, Vphaddbq, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphaddbq, Vphaddbq, Xmm, Mem)                         // XOP
+  ASMJIT_INST_2x(vphaddbw, Vphaddbw, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphaddbw, Vphaddbw, Xmm, Mem)                         // XOP
+  ASMJIT_INST_2x(vphadddq, Vphadddq, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphadddq, Vphadddq, Xmm, Mem)                         // XOP
+  ASMJIT_INST_2x(vphaddwd, Vphaddwd, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphaddwd, Vphaddwd, Xmm, Mem)                         // XOP
+  ASMJIT_INST_2x(vphaddwq, Vphaddwq, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphaddwq, Vphaddwq, Xmm, Mem)                         // XOP
+  ASMJIT_INST_2x(vphaddubd, Vphaddubd, Xmm, Xmm)                       // XOP
+  ASMJIT_INST_2x(vphaddubd, Vphaddubd, Xmm, Mem)                       // XOP
+  ASMJIT_INST_2x(vphaddubq, Vphaddubq, Xmm, Xmm)                       // XOP
+  ASMJIT_INST_2x(vphaddubq, Vphaddubq, Xmm, Mem)                       // XOP
+  ASMJIT_INST_2x(vphaddubw, Vphaddubw, Xmm, Xmm)                       // XOP
+  ASMJIT_INST_2x(vphaddubw, Vphaddubw, Xmm, Mem)                       // XOP
+  ASMJIT_INST_2x(vphaddudq, Vphaddudq, Xmm, Xmm)                       // XOP
+  ASMJIT_INST_2x(vphaddudq, Vphaddudq, Xmm, Mem)                       // XOP
+  ASMJIT_INST_2x(vphadduwd, Vphadduwd, Xmm, Xmm)                       // XOP
+  ASMJIT_INST_2x(vphadduwd, Vphadduwd, Xmm, Mem)                       // XOP
+  ASMJIT_INST_2x(vphadduwq, Vphadduwq, Xmm, Xmm)                       // XOP
+  ASMJIT_INST_2x(vphadduwq, Vphadduwq, Xmm, Mem)                       // XOP
+  ASMJIT_INST_2x(vphsubbw, Vphsubbw, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphsubbw, Vphsubbw, Xmm, Mem)                         // XOP
+  ASMJIT_INST_2x(vphsubdq, Vphsubdq, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphsubdq, Vphsubdq, Xmm, Mem)                         // XOP
+  ASMJIT_INST_2x(vphsubwd, Vphsubwd, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphsubwd, Vphsubwd, Xmm, Mem)                         // XOP
+  ASMJIT_INST_4x(vpmacsdd, Vpmacsdd, Xmm, Xmm, Xmm, Xmm)               // XOP
+  ASMJIT_INST_4x(vpmacsdd, Vpmacsdd, Xmm, Xmm, Mem, Xmm)               // XOP
+  ASMJIT_INST_4x(vpmacsdqh, Vpmacsdqh, Xmm, Xmm, Xmm, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacsdqh, Vpmacsdqh, Xmm, Xmm, Mem, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacsdql, Vpmacsdql, Xmm, Xmm, Xmm, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacsdql, Vpmacsdql, Xmm, Xmm, Mem, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacswd, Vpmacswd, Xmm, Xmm, Xmm, Xmm)               // XOP
+  ASMJIT_INST_4x(vpmacswd, Vpmacswd, Xmm, Xmm, Mem, Xmm)               // XOP
+  ASMJIT_INST_4x(vpmacsww, Vpmacsww, Xmm, Xmm, Xmm, Xmm)               // XOP
+  ASMJIT_INST_4x(vpmacsww, Vpmacsww, Xmm, Xmm, Mem, Xmm)               // XOP
+  ASMJIT_INST_4x(vpmacssdd, Vpmacssdd, Xmm, Xmm, Xmm, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacssdd, Vpmacssdd, Xmm, Xmm, Mem, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacssdqh, Vpmacssdqh, Xmm, Xmm, Xmm, Xmm)           // XOP
+  ASMJIT_INST_4x(vpmacssdqh, Vpmacssdqh, Xmm, Xmm, Mem, Xmm)           // XOP
+  ASMJIT_INST_4x(vpmacssdql, Vpmacssdql, Xmm, Xmm, Xmm, Xmm)           // XOP
+  ASMJIT_INST_4x(vpmacssdql, Vpmacssdql, Xmm, Xmm, Mem, Xmm)           // XOP
+  ASMJIT_INST_4x(vpmacsswd, Vpmacsswd, Xmm, Xmm, Xmm, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacsswd, Vpmacsswd, Xmm, Xmm, Mem, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacssww, Vpmacssww, Xmm, Xmm, Xmm, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacssww, Vpmacssww, Xmm, Xmm, Mem, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmadcsswd, Vpmadcsswd, Xmm, Xmm, Xmm, Xmm)           // XOP
+  ASMJIT_INST_4x(vpmadcsswd, Vpmadcsswd, Xmm, Xmm, Mem, Xmm)           // XOP
+  ASMJIT_INST_4x(vpmadcswd, Vpmadcswd, Xmm, Xmm, Xmm, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmadcswd, Vpmadcswd, Xmm, Xmm, Mem, Xmm)             // XOP
+  ASMJIT_INST_4x(vpperm, Vpperm, Xmm, Xmm, Xmm, Xmm)                   // XOP
+  ASMJIT_INST_4x(vpperm, Vpperm, Xmm, Xmm, Mem, Xmm)                   // XOP
+  ASMJIT_INST_4x(vpperm, Vpperm, Xmm, Xmm, Xmm, Mem)                   // XOP
+  ASMJIT_INST_3x(vprotb, Vprotb, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vprotb, Vprotb, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vprotb, Vprotb, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vprotb, Vprotb, Xmm, Xmm, Imm)                        // XOP
+  ASMJIT_INST_3x(vprotb, Vprotb, Xmm, Mem, Imm)                        // XOP
+  ASMJIT_INST_3x(vprotd, Vprotd, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vprotd, Vprotd, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vprotd, Vprotd, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vprotd, Vprotd, Xmm, Xmm, Imm)                        // XOP
+  ASMJIT_INST_3x(vprotd, Vprotd, Xmm, Mem, Imm)                        // XOP
+  ASMJIT_INST_3x(vprotq, Vprotq, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vprotq, Vprotq, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vprotq, Vprotq, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vprotq, Vprotq, Xmm, Xmm, Imm)                        // XOP
+  ASMJIT_INST_3x(vprotq, Vprotq, Xmm, Mem, Imm)                        // XOP
+  ASMJIT_INST_3x(vprotw, Vprotw, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vprotw, Vprotw, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vprotw, Vprotw, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vprotw, Vprotw, Xmm, Xmm, Imm)                        // XOP
+  ASMJIT_INST_3x(vprotw, Vprotw, Xmm, Mem, Imm)                        // XOP
+  ASMJIT_INST_3x(vpshab, Vpshab, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshab, Vpshab, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshab, Vpshab, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vpshad, Vpshad, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshad, Vpshad, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshad, Vpshad, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vpshaq, Vpshaq, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshaq, Vpshaq, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshaq, Vpshaq, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vpshaw, Vpshaw, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshaw, Vpshaw, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshaw, Vpshaw, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vpshlb, Vpshlb, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshlb, Vpshlb, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshlb, Vpshlb, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vpshld, Vpshld, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshld, Vpshld, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshld, Vpshld, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vpshlq, Vpshlq, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshlq, Vpshlq, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshlq, Vpshlq, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vpshlw, Vpshlw, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshlw, Vpshlw, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshlw, Vpshlw, Xmm, Xmm, Mem)                        // XOP
+
+  //! \}
+
+  //! \name AVX512_FP16 Instructions
+  //! \{
+
+  ASMJIT_INST_3x(vaddph, Vaddph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vaddph, Vaddph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vaddsh, Vaddsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vaddsh, Vaddsh, Vec, Vec, Mem)
+  ASMJIT_INST_4x(vcmpph, Vcmpph, KReg, Vec, Vec, Imm)
+  ASMJIT_INST_4x(vcmpph, Vcmpph, KReg, Vec, Mem, Imm)
+  ASMJIT_INST_4x(vcmpsh, Vcmpsh, KReg, Vec, Vec, Imm)
+  ASMJIT_INST_4x(vcmpsh, Vcmpsh, KReg, Vec, Mem, Imm)
+  ASMJIT_INST_2x(vcomish, Vcomish, Vec, Vec)
+  ASMJIT_INST_2x(vcomish, Vcomish, Vec, Mem)
+  ASMJIT_INST_2x(vcvtdq2ph, Vcvtdq2ph, Vec, Vec)
+  ASMJIT_INST_2x(vcvtdq2ph, Vcvtdq2ph, Vec, Mem)
+  ASMJIT_INST_2x(vcvtpd2ph, Vcvtpd2ph, Vec, Vec)
+  ASMJIT_INST_2x(vcvtpd2ph, Vcvtpd2ph, Vec, Mem)
+  ASMJIT_INST_2x(vcvtph2dq, Vcvtph2dq, Vec, Vec)
+  ASMJIT_INST_2x(vcvtph2dq, Vcvtph2dq, Vec, Mem)
+  ASMJIT_INST_2x(vcvtph2pd, Vcvtph2pd, Vec, Vec)
+  ASMJIT_INST_2x(vcvtph2pd, Vcvtph2pd, Vec, Mem)
+  ASMJIT_INST_2x(vcvtph2psx, Vcvtph2psx, Vec, Vec)
+  ASMJIT_INST_2x(vcvtph2psx, Vcvtph2psx, Vec, Mem)
+  ASMJIT_INST_2x(vcvtph2qq, Vcvtph2qq, Vec, Vec)
+  ASMJIT_INST_2x(vcvtph2qq, Vcvtph2qq, Vec, Mem)
+  ASMJIT_INST_2x(vcvtph2udq, Vcvtph2udq, Vec, Vec)
+  ASMJIT_INST_2x(vcvtph2udq, Vcvtph2udq, Vec, Mem)
+  ASMJIT_INST_2x(vcvtph2uqq, Vcvtph2uqq, Vec, Vec)
+  ASMJIT_INST_2x(vcvtph2uqq, Vcvtph2uqq, Vec, Mem)
+  ASMJIT_INST_2x(vcvtph2uw, Vcvtph2uw, Vec, Vec)
+  ASMJIT_INST_2x(vcvtph2uw, Vcvtph2uw, Vec, Mem)
+  ASMJIT_INST_2x(vcvtph2w, Vcvtph2w, Vec, Vec)
+  ASMJIT_INST_2x(vcvtph2w, Vcvtph2w, Vec, Mem)
+  ASMJIT_INST_2x(vcvtps2phx, Vcvtps2phx, Vec, Vec)
+  ASMJIT_INST_2x(vcvtps2phx, Vcvtps2phx, Vec, Mem)
+  ASMJIT_INST_2x(vcvtqq2ph, Vcvtqq2ph, Vec, Vec)
+  ASMJIT_INST_2x(vcvtqq2ph, Vcvtqq2ph, Vec, Mem)
+  ASMJIT_INST_3x(vcvtsd2sh, Vcvtsd2sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vcvtsd2sh, Vcvtsd2sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vcvtsh2sd, Vcvtsh2sd, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vcvtsh2sd, Vcvtsh2sd, Vec, Vec, Mem)
+  ASMJIT_INST_2x(vcvtsh2si, Vcvtsh2si, Gp, Vec)
+  ASMJIT_INST_2x(vcvtsh2si, Vcvtsh2si, Gp, Mem)
+  ASMJIT_INST_3x(vcvtsh2ss, Vcvtsh2ss, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vcvtsh2ss, Vcvtsh2ss, Vec, Vec, Mem)
+  ASMJIT_INST_2x(vcvtsh2usi, Vcvtsh2usi, Gp, Vec)
+  ASMJIT_INST_2x(vcvtsh2usi, Vcvtsh2usi, Gp, Mem)
+  ASMJIT_INST_3x(vcvtsi2sh, Vcvtsi2sh, Vec, Vec, Gp)
+  ASMJIT_INST_3x(vcvtsi2sh, Vcvtsi2sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vcvtss2sh, Vcvtss2sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vcvtss2sh, Vcvtss2sh, Vec, Vec, Mem)
+  ASMJIT_INST_2x(vcvttph2dq, Vcvttph2dq, Vec, Vec)
+  ASMJIT_INST_2x(vcvttph2dq, Vcvttph2dq, Vec, Mem)
+  ASMJIT_INST_2x(vcvttph2qq, Vcvttph2qq, Vec, Vec)
+  ASMJIT_INST_2x(vcvttph2qq, Vcvttph2qq, Vec, Mem)
+  ASMJIT_INST_2x(vcvttph2udq, Vcvttph2udq, Vec, Vec)
+  ASMJIT_INST_2x(vcvttph2udq, Vcvttph2udq, Vec, Mem)
+  ASMJIT_INST_2x(vcvttph2uqq, Vcvttph2uqq, Vec, Vec)
+  ASMJIT_INST_2x(vcvttph2uqq, Vcvttph2uqq, Vec, Mem)
+  ASMJIT_INST_2x(vcvttph2uw, Vcvttph2uw, Vec, Vec)
+  ASMJIT_INST_2x(vcvttph2uw, Vcvttph2uw, Vec, Mem)
+  ASMJIT_INST_2x(vcvttph2w, Vcvttph2w, Vec, Vec)
+  ASMJIT_INST_2x(vcvttph2w, Vcvttph2w, Vec, Mem)
+  ASMJIT_INST_2x(vcvttsh2si, Vcvttsh2si, Gp, Vec)
+  ASMJIT_INST_2x(vcvttsh2si, Vcvttsh2si, Gp, Mem)
+  ASMJIT_INST_2x(vcvttsh2usi, Vcvttsh2usi, Gp, Vec)
+  ASMJIT_INST_2x(vcvttsh2usi, Vcvttsh2usi, Gp, Mem)
+  ASMJIT_INST_2x(vcvtudq2ph, Vcvtudq2ph, Vec, Vec)
+  ASMJIT_INST_2x(vcvtudq2ph, Vcvtudq2ph, Vec, Mem)
+  ASMJIT_INST_2x(vcvtuqq2ph, Vcvtuqq2ph, Vec, Vec)
+  ASMJIT_INST_2x(vcvtuqq2ph, Vcvtuqq2ph, Vec, Mem)
+  ASMJIT_INST_3x(vcvtusi2sh, Vcvtusi2sh, Vec, Vec, Gp)
+  ASMJIT_INST_3x(vcvtusi2sh, Vcvtusi2sh, Vec, Vec, Mem)
+  ASMJIT_INST_2x(vcvtuw2ph, Vcvtuw2ph, Vec, Vec)
+  ASMJIT_INST_2x(vcvtuw2ph, Vcvtuw2ph, Vec, Mem)
+  ASMJIT_INST_2x(vcvtw2ph, Vcvtw2ph, Vec, Vec)
+  ASMJIT_INST_2x(vcvtw2ph, Vcvtw2ph, Vec, Mem)
+  ASMJIT_INST_3x(vdivph, Vdivph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vdivph, Vdivph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vdivsh, Vdivsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vdivsh, Vdivsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfcmaddcph, Vfcmaddcph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfcmaddcph, Vfcmaddcph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfcmaddcsh, Vfcmaddcsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfcmaddcsh, Vfcmaddcsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfcmulcph, Vfcmulcph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfcmulcph, Vfcmulcph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfcmulcsh, Vfcmulcsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfcmulcsh, Vfcmulcsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmadd132ph, Vfmadd132ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmadd132ph, Vfmadd132ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmadd132sh, Vfmadd132sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmadd132sh, Vfmadd132sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmadd213ph, Vfmadd213ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmadd213ph, Vfmadd213ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmadd213sh, Vfmadd213sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmadd213sh, Vfmadd213sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmadd231ph, Vfmadd231ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmadd231ph, Vfmadd231ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmadd231sh, Vfmadd231sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmadd231sh, Vfmadd231sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmaddcph, Vfmaddcph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmaddcph, Vfmaddcph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmaddcsh, Vfmaddcsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmaddcsh, Vfmaddcsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmaddsub132ph, Vfmaddsub132ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmaddsub132ph, Vfmaddsub132ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmaddsub213ph, Vfmaddsub213ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmaddsub213ph, Vfmaddsub213ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmaddsub231ph, Vfmaddsub231ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmaddsub231ph, Vfmaddsub231ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsub132ph, Vfmsub132ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsub132ph, Vfmsub132ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsub132sh, Vfmsub132sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsub132sh, Vfmsub132sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsub213ph, Vfmsub213ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsub213ph, Vfmsub213ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsub213sh, Vfmsub213sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsub213sh, Vfmsub213sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsub231ph, Vfmsub231ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsub231ph, Vfmsub231ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsub231sh, Vfmsub231sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsub231sh, Vfmsub231sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsubadd132ph, Vfmsubadd132ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsubadd132ph, Vfmsubadd132ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsubadd213ph, Vfmsubadd213ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsubadd213ph, Vfmsubadd213ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsubadd231ph, Vfmsubadd231ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsubadd231ph, Vfmsubadd231ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmulcph, Vfmulcph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmulcph, Vfmulcph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmulcsh, Vfmulcsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmulcsh, Vfmulcsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmadd132ph, Vfnmadd132ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmadd132ph, Vfnmadd132ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmadd132sh, Vfnmadd132sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmadd132sh, Vfnmadd132sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmadd213ph, Vfnmadd213ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmadd213ph, Vfnmadd213ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmadd213sh, Vfnmadd213sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmadd213sh, Vfnmadd213sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmadd231ph, Vfnmadd231ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmadd231ph, Vfnmadd231ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmadd231sh, Vfnmadd231sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmadd231sh, Vfnmadd231sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmsub132ph, Vfnmsub132ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmsub132ph, Vfnmsub132ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmsub132sh, Vfnmsub132sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmsub132sh, Vfnmsub132sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmsub213ph, Vfnmsub213ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmsub213ph, Vfnmsub213ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmsub213sh, Vfnmsub213sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmsub213sh, Vfnmsub213sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmsub231ph, Vfnmsub231ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmsub231ph, Vfnmsub231ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmsub231sh, Vfnmsub231sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmsub231sh, Vfnmsub231sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfpclassph, Vfpclassph, KReg, Vec, Imm)
+  ASMJIT_INST_3x(vfpclassph, Vfpclassph, KReg, Mem, Imm)
+  ASMJIT_INST_3x(vfpclasssh, Vfpclasssh, KReg, Vec, Imm)
+  ASMJIT_INST_3x(vfpclasssh, Vfpclasssh, KReg, Mem, Imm)
+  ASMJIT_INST_2x(vgetexpph, Vgetexpph, Vec, Vec)
+  ASMJIT_INST_2x(vgetexpph, Vgetexpph, Vec, Mem)
+  ASMJIT_INST_3x(vgetexpsh, Vgetexpsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vgetexpsh, Vgetexpsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vgetmantph, Vgetmantph, Vec, Vec, Imm)
+  ASMJIT_INST_3x(vgetmantph, Vgetmantph, Vec, Mem, Imm)
+  ASMJIT_INST_4x(vgetmantsh, Vgetmantsh, Vec, Vec, Vec, Imm)
+  ASMJIT_INST_4x(vgetmantsh, Vgetmantsh, Vec, Vec, Mem, Imm)
+  ASMJIT_INST_3x(vmaxph, Vmaxph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vmaxph, Vmaxph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vmaxsh, Vmaxsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vmaxsh, Vmaxsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vminph, Vminph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vminph, Vminph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vminsh, Vminsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vminsh, Vminsh, Vec, Vec, Mem)
+  ASMJIT_INST_2x(vmovsh, Vmovsh, Mem, Xmm)
+  ASMJIT_INST_2x(vmovsh, Vmovsh, Xmm, Mem)
+  ASMJIT_INST_3x(vmovsh, Vmovsh, Xmm, Xmm, Xmm)
+  ASMJIT_INST_2x(vmovw, Vmovw, Gp, Xmm)
+  ASMJIT_INST_2x(vmovw, Vmovw, Mem, Xmm)
+  ASMJIT_INST_2x(vmovw, Vmovw, Xmm, Gp)
+  ASMJIT_INST_2x(vmovw, Vmovw, Xmm, Mem)
+  ASMJIT_INST_3x(vmulph, Vmulph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vmulph, Vmulph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vmulsh, Vmulsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vmulsh, Vmulsh, Vec, Vec, Mem)
+  ASMJIT_INST_2x(vrcpph, Vrcpph, Vec, Vec)
+  ASMJIT_INST_2x(vrcpph, Vrcpph, Vec, Mem)
+  ASMJIT_INST_3x(vrcpsh, Vrcpsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vrcpsh, Vrcpsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vreduceph, Vreduceph, Vec, Vec, Imm)
+  ASMJIT_INST_3x(vreduceph, Vreduceph, Vec, Mem, Imm)
+  ASMJIT_INST_4x(vreducesh, Vreducesh, Vec, Vec, Vec, Imm)
+  ASMJIT_INST_4x(vreducesh, Vreducesh, Vec, Vec, Mem, Imm)
+  ASMJIT_INST_3x(vrndscaleph, Vrndscaleph, Vec, Vec, Imm)
+  ASMJIT_INST_3x(vrndscaleph, Vrndscaleph, Vec, Mem, Imm)
+  ASMJIT_INST_4x(vrndscalesh, Vrndscalesh, Vec, Vec, Vec, Imm)
+  ASMJIT_INST_4x(vrndscalesh, Vrndscalesh, Vec, Vec, Mem, Imm)
+  ASMJIT_INST_2x(vrsqrtph, Vrsqrtph, Vec, Vec)
+  ASMJIT_INST_2x(vrsqrtph, Vrsqrtph, Vec, Mem)
+  ASMJIT_INST_3x(vrsqrtsh, Vrsqrtsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vrsqrtsh, Vrsqrtsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vscalefph, Vscalefph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vscalefph, Vscalefph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vscalefsh, Vscalefsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vscalefsh, Vscalefsh, Vec, Vec, Mem)
+  ASMJIT_INST_2x(vsqrtph, Vsqrtph, Vec, Vec)
+  ASMJIT_INST_2x(vsqrtph, Vsqrtph, Vec, Mem)
+  ASMJIT_INST_3x(vsqrtsh, Vsqrtsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vsqrtsh, Vsqrtsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vsubph, Vsubph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vsubph, Vsubph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vsubsh, Vsubsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vsubsh, Vsubsh, Vec, Vec, Mem)
+  ASMJIT_INST_2x(vucomish, Vucomish, Vec, Vec)
+  ASMJIT_INST_2x(vucomish, Vucomish, Vec, Mem)
+
+  //! \}
+
+  //! \name AMX Instructions
+  //! \{
+
+  ASMJIT_INST_1x(ldtilecfg, Ldtilecfg, Mem)                            // AMX_TILE
+  ASMJIT_INST_1x(sttilecfg, Sttilecfg, Mem)                            // AMX_TILE
+  ASMJIT_INST_2x(tileloadd, Tileloadd, Tmm, Mem)                       // AMX_TILE
+  ASMJIT_INST_2x(tileloaddt1, Tileloaddt1, Tmm, Mem)                   // AMX_TILE
+  ASMJIT_INST_0x(tilerelease, Tilerelease)                             // AMX_TILE
+  ASMJIT_INST_2x(tilestored, Tilestored, Mem, Tmm)                     // AMX_TILE
+  ASMJIT_INST_1x(tilezero, Tilezero, Tmm)                              // AMX_TILE
+
+  ASMJIT_INST_3x(tdpbf16ps, Tdpbf16ps, Tmm, Tmm, Tmm)                  // AMX_BF16
+  ASMJIT_INST_3x(tdpbssd, Tdpbssd, Tmm, Tmm, Tmm)                      // AMX_INT8
+  ASMJIT_INST_3x(tdpbsud, Tdpbsud, Tmm, Tmm, Tmm)                      // AMX_INT8
+  ASMJIT_INST_3x(tdpbusd, Tdpbusd, Tmm, Tmm, Tmm)                      // AMX_INT8
+  ASMJIT_INST_3x(tdpbuud, Tdpbuud, Tmm, Tmm, Tmm)                      // AMX_INT8
+
+  //! \}
+};
+
+//! Emitter (X86 - implicit).
+template<typename This>
+struct EmitterImplicitT : public EmitterExplicitT<This> {
+  //! \cond
+  using EmitterExplicitT<This>::_emitter;
+  //! \endcond
+
+  //! \name Prefix Options
+  //! \{
+
+  //! Use REP/REPE prefix.
+  inline This& rep() noexcept { return EmitterExplicitT<This>::_addInstOptions(InstOptions::kX86_Rep); }
+  //! Use REP/REPE prefix.
+  inline This& repe() noexcept { return rep(); }
+  //! Use REP/REPE prefix.
+  inline This& repz() noexcept { return rep(); }
+
+  //! Use REPNE prefix.
+  inline This& repne() noexcept { return EmitterExplicitT<This>::_addInstOptions(InstOptions::kX86_Repne); }
+  //! Use REPNE prefix.
+  inline This& repnz() noexcept { return repne(); }
+
+  //! \}
+
+  //! \name Core Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::cbw;
+  using EmitterExplicitT<This>::cdq;
+  using EmitterExplicitT<This>::cdqe;
+  using EmitterExplicitT<This>::cqo;
+  using EmitterExplicitT<This>::cwd;
+  using EmitterExplicitT<This>::cwde;
+  using EmitterExplicitT<This>::cmpsd;
+  using EmitterExplicitT<This>::cmpxchg;
+  using EmitterExplicitT<This>::cmpxchg8b;
+  using EmitterExplicitT<This>::cmpxchg16b;
+  using EmitterExplicitT<This>::div;
+  using EmitterExplicitT<This>::idiv;
+  using EmitterExplicitT<This>::imul;
+  using EmitterExplicitT<This>::jecxz;
+  using EmitterExplicitT<This>::loop;
+  using EmitterExplicitT<This>::loope;
+  using EmitterExplicitT<This>::loopne;
+  using EmitterExplicitT<This>::mul;
+  //! \endcond
+
+  ASMJIT_INST_0x(cbw, Cbw)                                             // ANY       [IMPLICIT] AX      <- Sign Extend AL
+  ASMJIT_INST_0x(cdq, Cdq)                                             // ANY       [IMPLICIT] EDX:EAX <- Sign Extend EAX
+  ASMJIT_INST_0x(cdqe, Cdqe)                                           // X64       [IMPLICIT] RAX     <- Sign Extend EAX
+  ASMJIT_INST_2x(cmpxchg, Cmpxchg, Gp, Gp)                             // I486      [IMPLICIT]
+  ASMJIT_INST_2x(cmpxchg, Cmpxchg, Mem, Gp)                            // I486      [IMPLICIT]
+  ASMJIT_INST_1x(cmpxchg16b, Cmpxchg16b, Mem)                          // CMPXCHG8B [IMPLICIT] m == RDX:RAX ? m <- RCX:RBX
+  ASMJIT_INST_1x(cmpxchg8b, Cmpxchg8b, Mem)                            // CMPXCHG16B[IMPLICIT] m == EDX:EAX ? m <- ECX:EBX
+  ASMJIT_INST_0x(cqo, Cqo)                                             // X64       [IMPLICIT] RDX:RAX <- Sign Extend RAX
+  ASMJIT_INST_0x(cwd, Cwd)                                             // ANY       [IMPLICIT] DX:AX   <- Sign Extend AX
+  ASMJIT_INST_0x(cwde, Cwde)                                           // ANY       [IMPLICIT] EAX     <- Sign Extend AX
+  ASMJIT_INST_1x(div, Div, Gp)                                         // ANY       [IMPLICIT] {AH[Rem]: AL[Quot] <- AX / r8} {xDX[Rem]:xAX[Quot] <- DX:AX / r16|r32|r64}
+  ASMJIT_INST_1x(div, Div, Mem)                                        // ANY       [IMPLICIT] {AH[Rem]: AL[Quot] <- AX / m8} {xDX[Rem]:xAX[Quot] <- DX:AX / m16|m32|m64}
+  ASMJIT_INST_1x(idiv, Idiv, Gp)                                       // ANY       [IMPLICIT] {AH[Rem]: AL[Quot] <- AX / r8} {xDX[Rem]:xAX[Quot] <- DX:AX / r16|r32|r64}
+  ASMJIT_INST_1x(idiv, Idiv, Mem)                                      // ANY       [IMPLICIT] {AH[Rem]: AL[Quot] <- AX / m8} {xDX[Rem]:xAX[Quot] <- DX:AX / m16|m32|m64}
+  ASMJIT_INST_1x(imul, Imul, Gp)                                       // ANY       [IMPLICIT] {AX <- AL * r8} {xAX:xDX <- xAX * r16|r32|r64}
+  ASMJIT_INST_1x(imul, Imul, Mem)                                      // ANY       [IMPLICIT] {AX <- AL * m8} {xAX:xDX <- xAX * m16|m32|m64}
+  ASMJIT_INST_0x(iret, Iret)                                           // ANY       [IMPLICIT]
+  ASMJIT_INST_0x(iretd, Iretd)                                         // ANY       [IMPLICIT]
+  ASMJIT_INST_0x(iretq, Iretq)                                         // X64       [IMPLICIT]
+  ASMJIT_INST_1x(jecxz, Jecxz, Label)                                  // ANY       [IMPLICIT] Short jump if CX/ECX/RCX is zero.
+  ASMJIT_INST_1x(jecxz, Jecxz, Imm)                                    // ANY       [IMPLICIT] Short jump if CX/ECX/RCX is zero.
+  ASMJIT_INST_1x(loop, Loop, Label)                                    // ANY       [IMPLICIT] Decrement xCX; short jump if xCX != 0.
+  ASMJIT_INST_1x(loop, Loop, Imm)                                      // ANY       [IMPLICIT] Decrement xCX; short jump if xCX != 0.
+  ASMJIT_INST_1x(loope, Loope, Label)                                  // ANY       [IMPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1.
+  ASMJIT_INST_1x(loope, Loope, Imm)                                    // ANY       [IMPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1.
+  ASMJIT_INST_1x(loopne, Loopne, Label)                                // ANY       [IMPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0.
+  ASMJIT_INST_1x(loopne, Loopne, Imm)                                  // ANY       [IMPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0.
+  ASMJIT_INST_1x(mul, Mul, Gp)                                         // ANY       [IMPLICIT] {AX <- AL * r8} {xDX:xAX <- xAX * r16|r32|r64}
+  ASMJIT_INST_1x(mul, Mul, Mem)                                        // ANY       [IMPLICIT] {AX <- AL * m8} {xDX:xAX <- xAX * m16|m32|m64}
+  ASMJIT_INST_0x(ret, Ret)
+  ASMJIT_INST_1x(ret, Ret, Imm)
+  ASMJIT_INST_0x(retf, Retf)
+  ASMJIT_INST_1x(retf, Retf, Imm)
+  ASMJIT_INST_0x(xlatb, Xlatb)                                         // ANY       [IMPLICIT]
+
+  //! \}
+
+  //! \name String Instruction Aliases
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::movsd;
+  //! \endcond
+
+  inline Error cmpsb() { return _emitter()->emit(Inst::kIdCmps, EmitterExplicitT<This>::ptr_zsi(0, 1), EmitterExplicitT<This>::ptr_zdi(0, 1)); }
+  inline Error cmpsd() { return _emitter()->emit(Inst::kIdCmps, EmitterExplicitT<This>::ptr_zsi(0, 4), EmitterExplicitT<This>::ptr_zdi(0, 4)); }
+  inline Error cmpsq() { return _emitter()->emit(Inst::kIdCmps, EmitterExplicitT<This>::ptr_zsi(0, 8), EmitterExplicitT<This>::ptr_zdi(0, 8)); }
+  inline Error cmpsw() { return _emitter()->emit(Inst::kIdCmps, EmitterExplicitT<This>::ptr_zsi(0, 2), EmitterExplicitT<This>::ptr_zdi(0, 2)); }
+
+  inline Error lodsb() { return _emitter()->emit(Inst::kIdLods, al , EmitterExplicitT<This>::ptr_zsi(0, 1)); }
+  inline Error lodsd() { return _emitter()->emit(Inst::kIdLods, eax, EmitterExplicitT<This>::ptr_zsi(0, 4)); }
+  inline Error lodsq() { return _emitter()->emit(Inst::kIdLods, rax, EmitterExplicitT<This>::ptr_zsi(0, 8)); }
+  inline Error lodsw() { return _emitter()->emit(Inst::kIdLods, ax , EmitterExplicitT<This>::ptr_zsi(0, 2)); }
+
+  inline Error movsb() { return _emitter()->emit(Inst::kIdMovs, EmitterExplicitT<This>::ptr_zdi(0, 1), EmitterExplicitT<This>::ptr_zsi(0, 1)); }
+  inline Error movsd() { return _emitter()->emit(Inst::kIdMovs, EmitterExplicitT<This>::ptr_zdi(0, 4), EmitterExplicitT<This>::ptr_zsi(0, 4)); }
+  inline Error movsq() { return _emitter()->emit(Inst::kIdMovs, EmitterExplicitT<This>::ptr_zdi(0, 8), EmitterExplicitT<This>::ptr_zsi(0, 8)); }
+  inline Error movsw() { return _emitter()->emit(Inst::kIdMovs, EmitterExplicitT<This>::ptr_zdi(0, 2), EmitterExplicitT<This>::ptr_zsi(0, 2)); }
+
+  inline Error scasb() { return _emitter()->emit(Inst::kIdScas, al , EmitterExplicitT<This>::ptr_zdi(0, 1)); }
+  inline Error scasd() { return _emitter()->emit(Inst::kIdScas, eax, EmitterExplicitT<This>::ptr_zdi(0, 4)); }
+  inline Error scasq() { return _emitter()->emit(Inst::kIdScas, rax, EmitterExplicitT<This>::ptr_zdi(0, 8)); }
+  inline Error scasw() { return _emitter()->emit(Inst::kIdScas, ax , EmitterExplicitT<This>::ptr_zdi(0, 2)); }
+
+  inline Error stosb() { return _emitter()->emit(Inst::kIdStos, EmitterExplicitT<This>::ptr_zdi(0, 1), al ); }
+  inline Error stosd() { return _emitter()->emit(Inst::kIdStos, EmitterExplicitT<This>::ptr_zdi(0, 4), eax); }
+  inline Error stosq() { return _emitter()->emit(Inst::kIdStos, EmitterExplicitT<This>::ptr_zdi(0, 8), rax); }
+  inline Error stosw() { return _emitter()->emit(Inst::kIdStos, EmitterExplicitT<This>::ptr_zdi(0, 2), ax ); }
+
+  //! \}
+
+  //! \name Deprecated 32-bit Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::aaa;
+  using EmitterExplicitT<This>::aad;
+  using EmitterExplicitT<This>::aam;
+  using EmitterExplicitT<This>::aas;
+  using EmitterExplicitT<This>::daa;
+  using EmitterExplicitT<This>::das;
+  //! \endcond
+
+  ASMJIT_INST_0x(aaa, Aaa)                                             // X86 [IMPLICIT]
+  ASMJIT_INST_1x(aad, Aad, Imm)                                        // X86 [IMPLICIT]
+  ASMJIT_INST_1x(aam, Aam, Imm)                                        // X86 [IMPLICIT]
+  ASMJIT_INST_0x(aas, Aas)                                             // X86 [IMPLICIT]
+  ASMJIT_INST_0x(daa, Daa)                                             // X86 [IMPLICIT]
+  ASMJIT_INST_0x(das, Das)                                             // X86 [IMPLICIT]
+
+  //! \}
+
+  //! \name LAHF/SAHF Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::lahf;
+  using EmitterExplicitT<This>::sahf;
+  //! \endcond
+
+  ASMJIT_INST_0x(lahf, Lahf)                                           // LAHFSAHF  [IMPLICIT] AH <- EFL
+  ASMJIT_INST_0x(sahf, Sahf)                                           // LAHFSAHF  [IMPLICIT] EFL <- AH
+
+  //! \}
+
+  //! \name CPUID Instruction
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::cpuid;
+  //! \endcond
+
+  ASMJIT_INST_0x(cpuid, Cpuid)                                         // I486      [IMPLICIT] EAX:EBX:ECX:EDX  <- CPUID[EAX:ECX]
+
+  //! \}
+
+  //! \name CacheLine Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::clzero;
+  //! \endcond
+
+  ASMJIT_INST_0x(clzero, Clzero)                                       // CLZERO    [IMPLICIT]
+
+  //! \}
+
+  //! \name RDPRU/RDPKRU Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::rdpru;
+  using EmitterExplicitT<This>::rdpkru;
+  //! \endcond
+
+  ASMJIT_INST_0x(rdpru, Rdpru)                                         // RDPRU     [IMPLICIT] EDX:EAX <- PRU[ECX]
+  ASMJIT_INST_0x(rdpkru, Rdpkru)                                       // RDPKRU    [IMPLICIT] EDX:EAX <- PKRU[ECX]
+
+  //! \}
+
+  //! \name RDTSC/RDTSCP Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::rdtsc;
+  using EmitterExplicitT<This>::rdtscp;
+  //! \endcond
+
+  ASMJIT_INST_0x(rdtsc, Rdtsc)                                         // RDTSC     [IMPLICIT] EDX:EAX <- CNT
+  ASMJIT_INST_0x(rdtscp, Rdtscp)                                       // RDTSCP    [IMPLICIT] EDX:EAX:EXC <- CNT
+
+  //! \}
+
+  //! \name BMI2 Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::mulx;
+  //! \endcond
+
+  ASMJIT_INST_3x(mulx, Mulx, Gp, Gp, Gp)                               // BMI2      [IMPLICIT]
+  ASMJIT_INST_3x(mulx, Mulx, Gp, Gp, Mem)                              // BMI2      [IMPLICIT]
+
+  //! \}
+
+  //! \name XSAVE Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::xgetbv;
+  using EmitterExplicitT<This>::xrstor;
+  using EmitterExplicitT<This>::xrstor64;
+  using EmitterExplicitT<This>::xrstors;
+  using EmitterExplicitT<This>::xrstors64;
+  using EmitterExplicitT<This>::xsave;
+  using EmitterExplicitT<This>::xsave64;
+  using EmitterExplicitT<This>::xsavec;
+  using EmitterExplicitT<This>::xsavec64;
+  using EmitterExplicitT<This>::xsaveopt;
+  using EmitterExplicitT<This>::xsaveopt64;
+  using EmitterExplicitT<This>::xsaves;
+  using EmitterExplicitT<This>::xsaves64;
+  //! \endcond
+
+  ASMJIT_INST_0x(xgetbv, Xgetbv)                                       // XSAVE     [IMPLICIT] EDX:EAX <- XCR[ECX]
+  ASMJIT_INST_1x(xrstor, Xrstor, Mem)                                  // XSAVE     [IMPLICIT]
+  ASMJIT_INST_1x(xrstor64, Xrstor64, Mem)                              // XSAVE+X64 [IMPLICIT]
+  ASMJIT_INST_1x(xrstors, Xrstors, Mem)                                // XSAVE     [IMPLICIT]
+  ASMJIT_INST_1x(xrstors64, Xrstors64, Mem)                            // XSAVE+X64 [IMPLICIT]
+  ASMJIT_INST_1x(xsave, Xsave, Mem)                                    // XSAVE     [IMPLICIT]
+  ASMJIT_INST_1x(xsave64, Xsave64, Mem)                                // XSAVE+X64 [IMPLICIT]
+  ASMJIT_INST_1x(xsavec, Xsavec, Mem)                                  // XSAVE     [IMPLICIT]
+  ASMJIT_INST_1x(xsavec64, Xsavec64, Mem)                              // XSAVE+X64 [IMPLICIT]
+  ASMJIT_INST_1x(xsaveopt, Xsaveopt, Mem)                              // XSAVE     [IMPLICIT]
+  ASMJIT_INST_1x(xsaveopt64, Xsaveopt64, Mem)                          // XSAVE+X64 [IMPLICIT]
+  ASMJIT_INST_1x(xsaves, Xsaves, Mem)                                  // XSAVE     [IMPLICIT]
+  ASMJIT_INST_1x(xsaves64, Xsaves64, Mem)                              // XSAVE+X64 [IMPLICIT]
+
+  //! \}
+
+  //! \name SYSCALL/SYSENTER Instructions
+  //! \{
+
+  ASMJIT_INST_0x(syscall, Syscall)                                     // X64       [IMPLICIT]
+  ASMJIT_INST_0x(sysenter, Sysenter)                                   // X64       [IMPLICIT]
+
+  //! \}
+
+  //! \name HRESET Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::hreset;
+  //! \endcond
+
+  ASMJIT_INST_1x(hreset, Hreset, Imm)                                  // HRESET    [IMPLICIT]
+
+  //! \}
+
+  //! \name Privileged Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::rdmsr;
+  using EmitterExplicitT<This>::rdpmc;
+  using EmitterExplicitT<This>::wrmsr;
+  using EmitterExplicitT<This>::xsetbv;
+  //! \endcond
+
+  ASMJIT_INST_0x(pconfig, Pconfig)                                     // PCONFIG   [IMPLICIT]
+  ASMJIT_INST_0x(rdmsr, Rdmsr)                                         // ANY       [IMPLICIT]
+  ASMJIT_INST_0x(rdpmc, Rdpmc)                                         // ANY       [IMPLICIT]
+  ASMJIT_INST_0x(sysexit, Sysexit)                                     // X64       [IMPLICIT]
+  ASMJIT_INST_0x(sysexitq, Sysexitq)                                   // X64       [IMPLICIT]
+  ASMJIT_INST_0x(sysret, Sysret)                                       // X64       [IMPLICIT]
+  ASMJIT_INST_0x(sysretq, Sysretq)                                     // X64       [IMPLICIT]
+  ASMJIT_INST_0x(wrmsr, Wrmsr)                                         // ANY       [IMPLICIT]
+  ASMJIT_INST_0x(xsetbv, Xsetbv)                                       // XSAVE     [IMPLICIT] XCR[ECX] <- EDX:EAX
+
+  //! \}
+
+  //! \name Monitor & MWait Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::monitor;
+  using EmitterExplicitT<This>::monitorx;
+  using EmitterExplicitT<This>::mwait;
+  using EmitterExplicitT<This>::mwaitx;
+  //! \endcond
+
+  ASMJIT_INST_0x(monitor, Monitor)
+  ASMJIT_INST_0x(monitorx, Monitorx)
+  ASMJIT_INST_0x(mwait, Mwait)
+  ASMJIT_INST_0x(mwaitx, Mwaitx)
+
+  //! \}
+
+  //! \name WAITPKG Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::tpause;
+  using EmitterExplicitT<This>::umwait;
+  //! \endcond
+
+  ASMJIT_INST_1x(tpause, Tpause, Gp)
+  ASMJIT_INST_1x(umwait, Umwait, Gp)
+
+  //! \}
+
+  //! \name MMX & SSE Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::blendvpd;
+  using EmitterExplicitT<This>::blendvps;
+  using EmitterExplicitT<This>::maskmovq;
+  using EmitterExplicitT<This>::maskmovdqu;
+  using EmitterExplicitT<This>::pblendvb;
+  using EmitterExplicitT<This>::pcmpestri;
+  using EmitterExplicitT<This>::pcmpestrm;
+  using EmitterExplicitT<This>::pcmpistri;
+  using EmitterExplicitT<This>::pcmpistrm;
+  //! \endcond
+
+  ASMJIT_INST_2x(blendvpd, Blendvpd, Xmm, Xmm)                         // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_2x(blendvpd, Blendvpd, Xmm, Mem)                         // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_2x(blendvps, Blendvps, Xmm, Xmm)                         // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_2x(blendvps, Blendvps, Xmm, Mem)                         // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_2x(pblendvb, Pblendvb, Xmm, Xmm)                         // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_2x(pblendvb, Pblendvb, Xmm, Mem)                         // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_2x(maskmovq, Maskmovq, Mm, Mm)                           // SSE    [IMPLICIT]
+  ASMJIT_INST_2x(maskmovdqu, Maskmovdqu, Xmm, Xmm)                     // SSE2   [IMPLICIT]
+  ASMJIT_INST_3x(pcmpestri, Pcmpestri, Xmm, Xmm, Imm)                  // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_3x(pcmpestri, Pcmpestri, Xmm, Mem, Imm)                  // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_3x(pcmpestrm, Pcmpestrm, Xmm, Xmm, Imm)                  // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_3x(pcmpestrm, Pcmpestrm, Xmm, Mem, Imm)                  // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_3x(pcmpistri, Pcmpistri, Xmm, Xmm, Imm)                  // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_3x(pcmpistri, Pcmpistri, Xmm, Mem, Imm)                  // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_3x(pcmpistrm, Pcmpistrm, Xmm, Xmm, Imm)                  // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_3x(pcmpistrm, Pcmpistrm, Xmm, Mem, Imm)                  // SSE4_1 [IMPLICIT]
+
+  //! \}
+
+  //! \name SHA Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::sha256rnds2;
+  //! \endcond
+
+  ASMJIT_INST_2x(sha256rnds2, Sha256rnds2, Xmm, Xmm)                   // SHA [IMPLICIT]
+  ASMJIT_INST_2x(sha256rnds2, Sha256rnds2, Xmm, Mem)                   // SHA [IMPLICIT]
+
+  //! \}
+
+  //! \name AVX, FMA, and AVX512 Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::vmaskmovdqu;
+  using EmitterExplicitT<This>::vpcmpestri;
+  using EmitterExplicitT<This>::vpcmpestrm;
+  using EmitterExplicitT<This>::vpcmpistri;
+  using EmitterExplicitT<This>::vpcmpistrm;
+  //! \endcond
+
+  ASMJIT_INST_2x(vmaskmovdqu, Vmaskmovdqu, Xmm, Xmm)                   // AVX [IMPLICIT]
+  ASMJIT_INST_3x(vpcmpestri, Vpcmpestri, Xmm, Xmm, Imm)                // AVX [IMPLICIT]
+  ASMJIT_INST_3x(vpcmpestri, Vpcmpestri, Xmm, Mem, Imm)                // AVX [IMPLICIT]
+  ASMJIT_INST_3x(vpcmpestrm, Vpcmpestrm, Xmm, Xmm, Imm)                // AVX [IMPLICIT]
+  ASMJIT_INST_3x(vpcmpestrm, Vpcmpestrm, Xmm, Mem, Imm)                // AVX [IMPLICIT]
+  ASMJIT_INST_3x(vpcmpistri, Vpcmpistri, Xmm, Xmm, Imm)                // AVX [IMPLICIT]
+  ASMJIT_INST_3x(vpcmpistri, Vpcmpistri, Xmm, Mem, Imm)                // AVX [IMPLICIT]
+  ASMJIT_INST_3x(vpcmpistrm, Vpcmpistrm, Xmm, Xmm, Imm)                // AVX [IMPLICIT]
+  ASMJIT_INST_3x(vpcmpistrm, Vpcmpistrm, Xmm, Mem, Imm)                // AVX [IMPLICIT]
+
+  //! \}
+};
+
+//! Emitter (X86).
+//!
+//! \note This class cannot be instantiated, you can only cast to it and use it as emitter that emits to either
+//! `x86::Assembler`, `x86::Builder`, or `x86::Compiler` (use with caution with `x86::Compiler` as it requires
+//! virtual registers).
+class Emitter : public BaseEmitter, public EmitterImplicitT<Emitter> {
+  ASMJIT_NONCONSTRUCTIBLE(Emitter)
+};
+
+//! \}
+
+#undef ASMJIT_INST_0x
+#undef ASMJIT_INST_1x
+#undef ASMJIT_INST_1c
+#undef ASMJIT_INST_2x
+#undef ASMJIT_INST_2c
+#undef ASMJIT_INST_3x
+#undef ASMJIT_INST_4x
+#undef ASMJIT_INST_5x
+#undef ASMJIT_INST_6x
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86EMITTER_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86formatter.cpp b/lib/lepton/asmjit/x86/x86formatter.cpp
new file mode 100644
index 0000000000..d62dd18b63
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86formatter.cpp
@@ -0,0 +1,944 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/cpuinfo.h"
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../x86/x86formatter_p.h"
+#include "../x86/x86instapi_p.h"
+#include "../x86/x86instdb_p.h"
+#include "../x86/x86operand.h"
+
+#ifndef ASMJIT_NO_COMPILER
+  #include "../core/compiler.h"
+#endif
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::FormatterInternal - Constants
+// ==================================
+
+struct RegFormatInfo {
+  struct TypeEntry {
+    uint8_t index;
+  };
+
+  struct NameEntry {
+    uint8_t count;
+    uint8_t formatIndex;
+    uint8_t specialIndex;
+    uint8_t specialCount;
+  };
+
+  TypeEntry typeEntries[uint32_t(RegType::kMaxValue) + 1];
+  char typeStrings[128 - 32];
+
+  NameEntry nameEntries[uint32_t(RegType::kMaxValue) + 1];
+  char nameStrings[280];
+};
+
+template<uint32_t X>
+struct RegFormatInfo_T {
+  enum {
+    kTypeIndex    = X == uint32_t(RegType::kX86_GpbLo) ? 1   :
+                    X == uint32_t(RegType::kX86_GpbHi) ? 8   :
+                    X == uint32_t(RegType::kX86_Gpw  ) ? 15  :
+                    X == uint32_t(RegType::kX86_Gpd  ) ? 19  :
+                    X == uint32_t(RegType::kX86_Gpq  ) ? 23  :
+                    X == uint32_t(RegType::kX86_Xmm  ) ? 27  :
+                    X == uint32_t(RegType::kX86_Ymm  ) ? 31  :
+                    X == uint32_t(RegType::kX86_Zmm  ) ? 35  :
+                    X == uint32_t(RegType::kX86_Mm   ) ? 50  :
+                    X == uint32_t(RegType::kX86_KReg ) ? 53  :
+                    X == uint32_t(RegType::kX86_SReg ) ? 43  :
+                    X == uint32_t(RegType::kX86_CReg ) ? 59  :
+                    X == uint32_t(RegType::kX86_DReg ) ? 62  :
+                    X == uint32_t(RegType::kX86_St   ) ? 47  :
+                    X == uint32_t(RegType::kX86_Bnd  ) ? 55  :
+                    X == uint32_t(RegType::kX86_Tmm  ) ? 65  :
+                    X == uint32_t(RegType::kX86_Rip  ) ? 39  : 0,
+
+    kFormatIndex  = X == uint32_t(RegType::kX86_GpbLo) ? 1   :
+                    X == uint32_t(RegType::kX86_GpbHi) ? 6   :
+                    X == uint32_t(RegType::kX86_Gpw  ) ? 11  :
+                    X == uint32_t(RegType::kX86_Gpd  ) ? 16  :
+                    X == uint32_t(RegType::kX86_Gpq  ) ? 21  :
+                    X == uint32_t(RegType::kX86_Xmm  ) ? 25  :
+                    X == uint32_t(RegType::kX86_Ymm  ) ? 31  :
+                    X == uint32_t(RegType::kX86_Zmm  ) ? 37  :
+                    X == uint32_t(RegType::kX86_Mm   ) ? 60  :
+                    X == uint32_t(RegType::kX86_KReg ) ? 65  :
+                    X == uint32_t(RegType::kX86_SReg ) ? 49  :
+                    X == uint32_t(RegType::kX86_CReg ) ? 75  :
+                    X == uint32_t(RegType::kX86_DReg ) ? 80  :
+                    X == uint32_t(RegType::kX86_St   ) ? 55  :
+                    X == uint32_t(RegType::kX86_Bnd  ) ? 69  :
+                    X == uint32_t(RegType::kX86_Tmm  ) ? 89  :
+                    X == uint32_t(RegType::kX86_Rip  ) ? 43  : 0,
+
+    kSpecialIndex = X == uint32_t(RegType::kX86_GpbLo) ? 96  :
+                    X == uint32_t(RegType::kX86_GpbHi) ? 128 :
+                    X == uint32_t(RegType::kX86_Gpw  ) ? 161 :
+                    X == uint32_t(RegType::kX86_Gpd  ) ? 160 :
+                    X == uint32_t(RegType::kX86_Gpq  ) ? 192 :
+                    X == uint32_t(RegType::kX86_SReg ) ? 224 :
+                    X == uint32_t(RegType::kX86_Rip  ) ? 85  : 0,
+
+    kSpecialCount = X == uint32_t(RegType::kX86_GpbLo) ? 8   :
+                    X == uint32_t(RegType::kX86_GpbHi) ? 4   :
+                    X == uint32_t(RegType::kX86_Gpw  ) ? 8   :
+                    X == uint32_t(RegType::kX86_Gpd  ) ? 8   :
+                    X == uint32_t(RegType::kX86_Gpq  ) ? 8   :
+                    X == uint32_t(RegType::kX86_SReg ) ? 7   :
+                    X == uint32_t(RegType::kX86_Rip  ) ? 1   : 0
+  };
+};
+
+#define ASMJIT_REG_TYPE_ENTRY(TYPE) {   \
+  RegFormatInfo_T<TYPE>::kTypeIndex     \
+}
+
+#define ASMJIT_REG_NAME_ENTRY(TYPE) {   \
+  RegTraits<RegType(TYPE)>::kCount,     \
+  RegFormatInfo_T<TYPE>::kFormatIndex,  \
+  RegFormatInfo_T<TYPE>::kSpecialIndex, \
+  RegFormatInfo_T<TYPE>::kSpecialCount  \
+}
+
+static const RegFormatInfo x86RegFormatInfo = {
+  // Register type entries and strings.
+  { ASMJIT_LOOKUP_TABLE_32(ASMJIT_REG_TYPE_ENTRY, 0) },
+
+  "\0"             // #0
+  "gpb\0\0\0\0"    // #1
+  "gpb.hi\0"       // #8
+  "gpw\0"          // #15
+  "gpd\0"          // #19
+  "gpq\0"          // #23
+  "xmm\0"          // #27
+  "ymm\0"          // #31
+  "zmm\0"          // #35
+  "rip\0"          // #39
+  "seg\0"          // #43
+  "st\0"           // #47
+  "mm\0"           // #50
+  "k\0"            // #53
+  "bnd\0"          // #55
+  "cr\0"           // #59
+  "dr\0"           // #62
+  "tmm\0"          // #65
+  ,
+
+  // Register name entries and strings.
+  { ASMJIT_LOOKUP_TABLE_32(ASMJIT_REG_NAME_ENTRY, 0) },
+
+  "\0"
+  "r%ub\0"         // #1
+  "r%uh\0"         // #6
+  "r%uw\0"         // #11
+  "r%ud\0"         // #16
+  "r%u\0"          // #21
+  "xmm%u\0"        // #25
+  "ymm%u\0"        // #31
+  "zmm%u\0"        // #37
+  "rip%u\0"        // #43
+  "seg%u\0"        // #49
+  "st%u\0"         // #55
+  "mm%u\0"         // #60
+  "k%u\0"          // #65
+  "bnd%u\0"        // #69
+  "cr%u\0"         // #75
+  "dr%u\0"         // #80
+
+  "rip\0"          // #85
+  "tmm%u\0"        // #89
+  "\0"             // #95
+
+  "al\0\0" "cl\0\0" "dl\0\0" "bl\0\0" "spl\0"  "bpl\0"  "sil\0"  "dil\0" // #96
+  "ah\0\0" "ch\0\0" "dh\0\0" "bh\0\0" "n/a\0"  "n/a\0"  "n/a\0"  "n/a\0" // #128
+  "eax\0"  "ecx\0"  "edx\0"  "ebx\0"  "esp\0"  "ebp\0"  "esi\0"  "edi\0" // #160
+  "rax\0"  "rcx\0"  "rdx\0"  "rbx\0"  "rsp\0"  "rbp\0"  "rsi\0"  "rdi\0" // #192
+  "n/a\0"  "es\0\0" "cs\0\0" "ss\0\0" "ds\0\0" "fs\0\0" "gs\0\0" "n/a\0" // #224
+};
+#undef ASMJIT_REG_NAME_ENTRY
+#undef ASMJIT_REG_TYPE_ENTRY
+
+static const char* x86GetAddressSizeString(uint32_t size) noexcept {
+  switch (size) {
+    case 1 : return "byte ptr ";
+    case 2 : return "word ptr ";
+    case 4 : return "dword ptr ";
+    case 6 : return "fword ptr ";
+    case 8 : return "qword ptr ";
+    case 10: return "tbyte ptr ";
+    case 16: return "xmmword ptr ";
+    case 32: return "ymmword ptr ";
+    case 64: return "zmmword ptr ";
+    default: return "";
+  }
+}
+
+// x86::FormatterInternal - Format FeatureId
+// =========================================
+
+Error FormatterInternal::formatFeature(String& sb, uint32_t featureId) noexcept {
+  // @EnumStringBegin{"enum": "CpuFeatures::X86", "output": "sFeature", "strip": "k"}@
+  static const char sFeatureString[] =
+    "None\0"
+    "MT\0"
+    "NX\0"
+    "3DNOW\0"
+    "3DNOW2\0"
+    "ADX\0"
+    "AESNI\0"
+    "ALTMOVCR8\0"
+    "AMX_BF16\0"
+    "AMX_INT8\0"
+    "AMX_TILE\0"
+    "AVX\0"
+    "AVX2\0"
+    "AVX512_4FMAPS\0"
+    "AVX512_4VNNIW\0"
+    "AVX512_BF16\0"
+    "AVX512_BITALG\0"
+    "AVX512_BW\0"
+    "AVX512_CDI\0"
+    "AVX512_DQ\0"
+    "AVX512_ERI\0"
+    "AVX512_F\0"
+    "AVX512_FP16\0"
+    "AVX512_IFMA\0"
+    "AVX512_PFI\0"
+    "AVX512_VBMI\0"
+    "AVX512_VBMI2\0"
+    "AVX512_VL\0"
+    "AVX512_VNNI\0"
+    "AVX512_VP2INTERSECT\0"
+    "AVX512_VPOPCNTDQ\0"
+    "AVX_VNNI\0"
+    "BMI\0"
+    "BMI2\0"
+    "CET_IBT\0"
+    "CET_SS\0"
+    "CLDEMOTE\0"
+    "CLFLUSH\0"
+    "CLFLUSHOPT\0"
+    "CLWB\0"
+    "CLZERO\0"
+    "CMOV\0"
+    "CMPXCHG16B\0"
+    "CMPXCHG8B\0"
+    "ENCLV\0"
+    "ENQCMD\0"
+    "ERMS\0"
+    "F16C\0"
+    "FMA\0"
+    "FMA4\0"
+    "FPU\0"
+    "FSGSBASE\0"
+    "FXSR\0"
+    "FXSROPT\0"
+    "GEODE\0"
+    "GFNI\0"
+    "HLE\0"
+    "HRESET\0"
+    "I486\0"
+    "LAHFSAHF\0"
+    "LWP\0"
+    "LZCNT\0"
+    "MCOMMIT\0"
+    "MMX\0"
+    "MMX2\0"
+    "MONITOR\0"
+    "MONITORX\0"
+    "MOVBE\0"
+    "MOVDIR64B\0"
+    "MOVDIRI\0"
+    "MPX\0"
+    "MSR\0"
+    "MSSE\0"
+    "OSXSAVE\0"
+    "OSPKE\0"
+    "PCLMULQDQ\0"
+    "PCONFIG\0"
+    "POPCNT\0"
+    "PREFETCHW\0"
+    "PREFETCHWT1\0"
+    "PTWRITE\0"
+    "RDPID\0"
+    "RDPRU\0"
+    "RDRAND\0"
+    "RDSEED\0"
+    "RDTSC\0"
+    "RDTSCP\0"
+    "RTM\0"
+    "SERIALIZE\0"
+    "SHA\0"
+    "SKINIT\0"
+    "SMAP\0"
+    "SMEP\0"
+    "SMX\0"
+    "SNP\0"
+    "SSE\0"
+    "SSE2\0"
+    "SSE3\0"
+    "SSE4_1\0"
+    "SSE4_2\0"
+    "SSE4A\0"
+    "SSSE3\0"
+    "SVM\0"
+    "TBM\0"
+    "TSX\0"
+    "TSXLDTRK\0"
+    "UINTR\0"
+    "VAES\0"
+    "VMX\0"
+    "VPCLMULQDQ\0"
+    "WAITPKG\0"
+    "WBNOINVD\0"
+    "XOP\0"
+    "XSAVE\0"
+    "XSAVEC\0"
+    "XSAVEOPT\0"
+    "XSAVES\0"
+    "<Unknown>\0";
+
+  static const uint16_t sFeatureIndex[] = {
+    0, 5, 8, 11, 17, 24, 28, 34, 44, 53, 62, 71, 75, 80, 94, 108, 120, 134, 144,
+    155, 165, 176, 185, 197, 209, 220, 232, 245, 255, 267, 287, 304, 313, 317,
+    322, 330, 337, 346, 354, 365, 370, 377, 382, 393, 403, 409, 416, 421, 426,
+    430, 435, 439, 448, 453, 461, 467, 472, 476, 483, 488, 497, 501, 507, 515,
+    519, 524, 532, 541, 547, 557, 565, 569, 573, 578, 586, 592, 602, 610, 617,
+    627, 639, 647, 653, 659, 666, 673, 679, 686, 690, 700, 704, 711, 716, 721,
+    725, 729, 733, 738, 743, 750, 757, 763, 769, 773, 777, 781, 790, 796, 801,
+    805, 816, 824, 833, 837, 843, 850, 859, 866
+  };
+  // @EnumStringEnd@
+
+  return sb.append(sFeatureString + sFeatureIndex[Support::min<uint32_t>(featureId, uint32_t(CpuFeatures::X86::kMaxValue) + 1)]);
+}
+
+// x86::FormatterInternal - Format Register
+// ========================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatRegister(String& sb, FormatFlags formatFlags, const BaseEmitter* emitter, Arch arch, RegType type, uint32_t id) noexcept {
+  DebugUtils::unused(arch);
+  const RegFormatInfo& info = x86RegFormatInfo;
+
+#ifndef ASMJIT_NO_COMPILER
+  if (Operand::isVirtId(id)) {
+    if (emitter && emitter->emitterType() == EmitterType::kCompiler) {
+      const BaseCompiler* cc = static_cast<const BaseCompiler*>(emitter);
+      if (cc->isVirtIdValid(id)) {
+        VirtReg* vReg = cc->virtRegById(id);
+        ASMJIT_ASSERT(vReg != nullptr);
+
+        const char* name = vReg->name();
+        if (name && name[0] != '\0')
+          ASMJIT_PROPAGATE(sb.append(name));
+        else
+          ASMJIT_PROPAGATE(sb.appendFormat("%%%u", unsigned(Operand::virtIdToIndex(id))));
+
+        if (vReg->type() != type && uint32_t(type) <= uint32_t(RegType::kMaxValue) && Support::test(formatFlags, FormatFlags::kRegCasts)) {
+          const RegFormatInfo::TypeEntry& typeEntry = info.typeEntries[size_t(type)];
+          if (typeEntry.index)
+            ASMJIT_PROPAGATE(sb.appendFormat("@%s", info.typeStrings + typeEntry.index));
+        }
+
+        return kErrorOk;
+      }
+    }
+  }
+#else
+  DebugUtils::unused(emitter, formatFlags);
+#endif
+
+  if (uint32_t(type) <= uint32_t(RegType::kMaxValue)) {
+    const RegFormatInfo::NameEntry& nameEntry = info.nameEntries[size_t(type)];
+
+    if (id < nameEntry.specialCount)
+      return sb.append(info.nameStrings + nameEntry.specialIndex + id * 4);
+
+    if (id < nameEntry.count)
+      return sb.appendFormat(info.nameStrings + nameEntry.formatIndex, unsigned(id));
+
+    const RegFormatInfo::TypeEntry& typeEntry = info.typeEntries[size_t(type)];
+    if (typeEntry.index)
+      return sb.appendFormat("%s@%u", info.typeStrings + typeEntry.index, id);
+  }
+
+  return sb.appendFormat("<Reg-%u>?%u", uint32_t(type), id);
+}
+
+// x86::FormatterInternal - Format Operand
+// =======================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatOperand(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const Operand_& op) noexcept {
+
+  if (op.isReg())
+    return formatRegister(sb, formatFlags, emitter, arch, op.as<BaseReg>().type(), op.as<BaseReg>().id());
+
+  if (op.isMem()) {
+    const Mem& m = op.as<Mem>();
+    ASMJIT_PROPAGATE(sb.append(x86GetAddressSizeString(m.size())));
+
+    // Segment override prefix.
+    uint32_t seg = m.segmentId();
+    if (seg != SReg::kIdNone && seg < SReg::kIdCount)
+      ASMJIT_PROPAGATE(sb.appendFormat("%s:", x86RegFormatInfo.nameStrings + 224 + size_t(seg) * 4));
+
+    ASMJIT_PROPAGATE(sb.append('['));
+    switch (m.addrType()) {
+      case Mem::AddrType::kDefault:
+        break;
+      case Mem::AddrType::kAbs:
+        ASMJIT_PROPAGATE(sb.append("abs "));
+        break;
+      case Mem::AddrType::kRel:
+        ASMJIT_PROPAGATE(sb.append("rel "));
+        break;
+    }
+
+    char opSign = '\0';
+    if (m.hasBase()) {
+      opSign = '+';
+      if (m.hasBaseLabel()) {
+        ASMJIT_PROPAGATE(Formatter::formatLabel(sb, formatFlags, emitter, m.baseId()));
+      }
+      else {
+        FormatFlags modifiedFlags = formatFlags;
+        if (m.isRegHome()) {
+          ASMJIT_PROPAGATE(sb.append("&"));
+          modifiedFlags &= ~FormatFlags::kRegCasts;
+        }
+        ASMJIT_PROPAGATE(formatRegister(sb, modifiedFlags, emitter, arch, m.baseType(), m.baseId()));
+      }
+    }
+
+    if (m.hasIndex()) {
+      if (opSign)
+        ASMJIT_PROPAGATE(sb.append(opSign));
+
+      opSign = '+';
+      ASMJIT_PROPAGATE(formatRegister(sb, formatFlags, emitter, arch, m.indexType(), m.indexId()));
+      if (m.hasShift())
+        ASMJIT_PROPAGATE(sb.appendFormat("*%u", 1 << m.shift()));
+    }
+
+    uint64_t off = uint64_t(m.offset());
+    if (off || !m.hasBaseOrIndex()) {
+      if (int64_t(off) < 0) {
+        opSign = '-';
+        off = ~off + 1;
+      }
+
+      if (opSign)
+        ASMJIT_PROPAGATE(sb.append(opSign));
+
+      uint32_t base = 10;
+      if (Support::test(formatFlags, FormatFlags::kHexOffsets) && off > 9) {
+        ASMJIT_PROPAGATE(sb.append("0x", 2));
+        base = 16;
+      }
+
+      ASMJIT_PROPAGATE(sb.appendUInt(off, base));
+    }
+
+    return sb.append(']');
+  }
+
+  if (op.isImm()) {
+    const Imm& i = op.as<Imm>();
+    int64_t val = i.value();
+
+    if (Support::test(formatFlags, FormatFlags::kHexImms) && uint64_t(val) > 9) {
+      ASMJIT_PROPAGATE(sb.append("0x", 2));
+      return sb.appendUInt(uint64_t(val), 16);
+    }
+    else {
+      return sb.appendInt(val, 10);
+    }
+  }
+
+  if (op.isLabel()) {
+    return Formatter::formatLabel(sb, formatFlags, emitter, op.id());
+  }
+
+  return sb.append("<None>");
+}
+
+// x86::FormatterInternal - Format Immediate (Extension)
+// =====================================================
+
+static constexpr char kImmCharStart = '{';
+static constexpr char kImmCharEnd   = '}';
+static constexpr char kImmCharOr    = '|';
+
+struct ImmBits {
+  enum Mode : uint32_t {
+    kModeLookup = 0,
+    kModeFormat = 1
+  };
+
+  uint8_t mask;
+  uint8_t shift;
+  uint8_t mode;
+  char text[48 - 3];
+};
+
+ASMJIT_FAVOR_SIZE static Error FormatterInternal_formatImmShuf(String& sb, uint32_t u8, uint32_t bits, uint32_t count) noexcept {
+  uint32_t mask = (1 << bits) - 1;
+
+  for (uint32_t i = 0; i < count; i++, u8 >>= bits) {
+    uint32_t value = u8 & mask;
+    ASMJIT_PROPAGATE(sb.append(i == 0 ? kImmCharStart : kImmCharOr));
+    ASMJIT_PROPAGATE(sb.appendUInt(value));
+  }
+
+  if (kImmCharEnd)
+    ASMJIT_PROPAGATE(sb.append(kImmCharEnd));
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE static Error FormatterInternal_formatImmBits(String& sb, uint32_t u8, const ImmBits* bits, uint32_t count) noexcept {
+  uint32_t n = 0;
+  char buf[64];
+
+  for (uint32_t i = 0; i < count; i++) {
+    const ImmBits& spec = bits[i];
+
+    uint32_t value = (u8 & uint32_t(spec.mask)) >> spec.shift;
+    const char* str = nullptr;
+
+    switch (spec.mode) {
+      case ImmBits::kModeLookup:
+        str = Support::findPackedString(spec.text, value);
+        break;
+
+      case ImmBits::kModeFormat:
+        snprintf(buf, sizeof(buf), spec.text, unsigned(value));
+        str = buf;
+        break;
+
+      default:
+        return DebugUtils::errored(kErrorInvalidState);
+    }
+
+    if (!str[0])
+      continue;
+
+    ASMJIT_PROPAGATE(sb.append(++n == 1 ? kImmCharStart : kImmCharOr));
+    ASMJIT_PROPAGATE(sb.append(str));
+  }
+
+  if (n && kImmCharEnd)
+    ASMJIT_PROPAGATE(sb.append(kImmCharEnd));
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE static Error FormatterInternal_formatImmText(String& sb, uint32_t u8, uint32_t bits, uint32_t advance, const char* text, uint32_t count = 1) noexcept {
+  uint32_t mask = (1u << bits) - 1;
+  uint32_t pos = 0;
+
+  for (uint32_t i = 0; i < count; i++, u8 >>= bits, pos += advance) {
+    uint32_t value = (u8 & mask) + pos;
+    ASMJIT_PROPAGATE(sb.append(i == 0 ? kImmCharStart : kImmCharOr));
+    ASMJIT_PROPAGATE(sb.append(Support::findPackedString(text, value)));
+  }
+
+  if (kImmCharEnd)
+    ASMJIT_PROPAGATE(sb.append(kImmCharEnd));
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE static Error FormatterInternal_explainConst(
+  String& sb,
+  FormatFlags formatFlags,
+  InstId instId,
+  uint32_t vecSize,
+  const Imm& imm) noexcept {
+
+  DebugUtils::unused(formatFlags);
+
+  static const char vcmpx[] =
+    "EQ_OQ\0" "LT_OS\0"  "LE_OS\0"  "UNORD_Q\0"  "NEQ_UQ\0" "NLT_US\0" "NLE_US\0" "ORD_Q\0"
+    "EQ_UQ\0" "NGE_US\0" "NGT_US\0" "FALSE_OQ\0" "NEQ_OQ\0" "GE_OS\0"  "GT_OS\0"  "TRUE_UQ\0"
+    "EQ_OS\0" "LT_OQ\0"  "LE_OQ\0"  "UNORD_S\0"  "NEQ_US\0" "NLT_UQ\0" "NLE_UQ\0" "ORD_S\0"
+    "EQ_US\0" "NGE_UQ\0" "NGT_UQ\0" "FALSE_OS\0" "NEQ_OS\0" "GE_OQ\0"  "GT_OQ\0"  "TRUE_US\0";
+
+  // Why to make it compatible...
+  static const char vpcmpx[] = "EQ\0" "LT\0" "LE\0" "FALSE\0" "NEQ\0" "GE\0"  "GT\0"    "TRUE\0";
+  static const char vpcomx[] = "LT\0" "LE\0" "GT\0" "GE\0"    "EQ\0"  "NEQ\0" "FALSE\0" "TRUE\0";
+
+  static const char vshufpd[] = "A0\0A1\0B0\0B1\0A2\0A3\0B2\0B3\0A4\0A5\0B4\0B5\0A6\0A7\0B6\0B7\0";
+  static const char vshufps[] = "A0\0A1\0A2\0A3\0A0\0A1\0A2\0A3\0B0\0B1\0B2\0B3\0B0\0B1\0B2\0B3\0";
+
+  static const ImmBits vfpclassxx[] = {
+    { 0x07u, 0, ImmBits::kModeLookup, "QNAN\0" "+0\0" "-0\0" "+INF\0" "-INF\0" "DENORMAL\0" "-FINITE\0" "SNAN\0" }
+  };
+
+  static const ImmBits vfixupimmxx[] = {
+    { 0x01u, 0, ImmBits::kModeLookup, "\0" "+INF_IE\0" },
+    { 0x02u, 1, ImmBits::kModeLookup, "\0" "-VE_IE\0"  },
+    { 0x04u, 2, ImmBits::kModeLookup, "\0" "-INF_IE\0" },
+    { 0x08u, 3, ImmBits::kModeLookup, "\0" "SNAN_IE\0" },
+    { 0x10u, 4, ImmBits::kModeLookup, "\0" "ONE_IE\0"  },
+    { 0x20u, 5, ImmBits::kModeLookup, "\0" "ONE_ZE\0"  },
+    { 0x40u, 6, ImmBits::kModeLookup, "\0" "ZERO_IE\0" },
+    { 0x80u, 7, ImmBits::kModeLookup, "\0" "ZERO_ZE\0" }
+  };
+
+  static const ImmBits vgetmantxx[] = {
+    { 0x03u, 0, ImmBits::kModeLookup, "[1, 2)\0" "[.5, 2)\0" "[.5, 1)\0" "[.75, 1.5)\0" },
+    { 0x04u, 2, ImmBits::kModeLookup, "\0" "NO_SIGN\0" },
+    { 0x08u, 3, ImmBits::kModeLookup, "\0" "QNAN_IF_SIGN\0" }
+  };
+
+  static const ImmBits vmpsadbw[] = {
+    { 0x04u, 2, ImmBits::kModeLookup, "BLK1[0]\0" "BLK1[1]\0" },
+    { 0x03u, 0, ImmBits::kModeLookup, "BLK2[0]\0" "BLK2[1]\0" "BLK2[2]\0" "BLK2[3]\0" },
+    { 0x40u, 6, ImmBits::kModeLookup, "BLK1[4]\0" "BLK1[5]\0" },
+    { 0x30u, 4, ImmBits::kModeLookup, "BLK2[4]\0" "BLK2[5]\0" "BLK2[6]\0" "BLK2[7]\0" }
+  };
+
+  static const ImmBits vpclmulqdq[] = {
+    { 0x01u, 0, ImmBits::kModeLookup, "LQ\0" "HQ\0" },
+    { 0x10u, 4, ImmBits::kModeLookup, "LQ\0" "HQ\0" }
+  };
+
+  static const ImmBits vperm2x128[] = {
+    { 0x0Bu, 0, ImmBits::kModeLookup, "A0\0" "A1\0" "B0\0" "B1\0" "\0" "\0" "\0" "\0" "0\0" "0\0" "0\0" "0\0" },
+    { 0xB0u, 4, ImmBits::kModeLookup, "A0\0" "A1\0" "B0\0" "B1\0" "\0" "\0" "\0" "\0" "0\0" "0\0" "0\0" "0\0" }
+  };
+
+  static const ImmBits vrangexx[] = {
+    { 0x03u, 0, ImmBits::kModeLookup, "MIN\0" "MAX\0" "MIN_ABS\0" "MAX_ABS\0" },
+    { 0x0Cu, 2, ImmBits::kModeLookup, "SIGN_A\0" "SIGN_B\0" "SIGN_0\0" "SIGN_1\0" }
+  };
+
+  static const ImmBits vreducexx_vrndscalexx[] = {
+    { 0x07u, 0, ImmBits::kModeLookup, "\0" "\0" "\0" "\0" "ROUND\0" "FLOOR\0" "CEIL\0" "TRUNC\0" },
+    { 0x08u, 3, ImmBits::kModeLookup, "\0" "SAE\0" },
+    { 0xF0u, 4, ImmBits::kModeFormat, "LEN=%d" }
+  };
+
+  static const ImmBits vroundxx[] = {
+    { 0x07u, 0, ImmBits::kModeLookup, "ROUND\0" "FLOOR\0" "CEIL\0" "TRUNC\0" "\0" "\0" "\0" "\0" },
+    { 0x08u, 3, ImmBits::kModeLookup, "\0" "INEXACT\0" }
+  };
+
+  uint32_t u8 = imm.valueAs<uint8_t>();
+  switch (instId) {
+    case Inst::kIdVblendpd:
+    case Inst::kIdBlendpd:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, vecSize / 8);
+
+    case Inst::kIdVblendps:
+    case Inst::kIdBlendps:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, vecSize / 4);
+
+    case Inst::kIdVcmppd:
+    case Inst::kIdVcmpps:
+    case Inst::kIdVcmpsd:
+    case Inst::kIdVcmpss:
+      return FormatterInternal_formatImmText(sb, u8, 5, 0, vcmpx);
+
+    case Inst::kIdCmppd:
+    case Inst::kIdCmpps:
+    case Inst::kIdCmpsd:
+    case Inst::kIdCmpss:
+      return FormatterInternal_formatImmText(sb, u8, 3, 0, vcmpx);
+
+    case Inst::kIdVdbpsadbw:
+      return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
+
+    case Inst::kIdVdppd:
+    case Inst::kIdVdpps:
+    case Inst::kIdDppd:
+    case Inst::kIdDpps:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, 8);
+
+    case Inst::kIdVmpsadbw:
+    case Inst::kIdMpsadbw:
+      return FormatterInternal_formatImmBits(sb, u8, vmpsadbw, Support::min<uint32_t>(vecSize / 8, 4));
+
+    case Inst::kIdVpblendw:
+    case Inst::kIdPblendw:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, 8);
+
+    case Inst::kIdVpblendd:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, Support::min<uint32_t>(vecSize / 4, 8));
+
+    case Inst::kIdVpclmulqdq:
+    case Inst::kIdPclmulqdq:
+      return FormatterInternal_formatImmBits(sb, u8, vpclmulqdq, ASMJIT_ARRAY_SIZE(vpclmulqdq));
+
+    case Inst::kIdVroundpd:
+    case Inst::kIdVroundps:
+    case Inst::kIdVroundsd:
+    case Inst::kIdVroundss:
+    case Inst::kIdRoundpd:
+    case Inst::kIdRoundps:
+    case Inst::kIdRoundsd:
+    case Inst::kIdRoundss:
+      return FormatterInternal_formatImmBits(sb, u8, vroundxx, ASMJIT_ARRAY_SIZE(vroundxx));
+
+    case Inst::kIdVshufpd:
+    case Inst::kIdShufpd:
+      return FormatterInternal_formatImmText(sb, u8, 1, 2, vshufpd, Support::min<uint32_t>(vecSize / 8, 8));
+
+    case Inst::kIdVshufps:
+    case Inst::kIdShufps:
+      return FormatterInternal_formatImmText(sb, u8, 2, 4, vshufps, 4);
+
+    case Inst::kIdVcvtps2ph:
+      return FormatterInternal_formatImmBits(sb, u8, vroundxx, 1);
+
+    case Inst::kIdVperm2f128:
+    case Inst::kIdVperm2i128:
+      return FormatterInternal_formatImmBits(sb, u8, vperm2x128, ASMJIT_ARRAY_SIZE(vperm2x128));
+
+    case Inst::kIdVpermilpd:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, vecSize / 8);
+
+    case Inst::kIdVpermilps:
+      return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
+
+    case Inst::kIdVpshufd:
+    case Inst::kIdPshufd:
+      return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
+
+    case Inst::kIdVpshufhw:
+    case Inst::kIdVpshuflw:
+    case Inst::kIdPshufhw:
+    case Inst::kIdPshuflw:
+    case Inst::kIdPshufw:
+      return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
+
+    case Inst::kIdVfixupimmpd:
+    case Inst::kIdVfixupimmps:
+    case Inst::kIdVfixupimmsd:
+    case Inst::kIdVfixupimmss:
+      return FormatterInternal_formatImmBits(sb, u8, vfixupimmxx, ASMJIT_ARRAY_SIZE(vfixupimmxx));
+
+    case Inst::kIdVfpclasspd:
+    case Inst::kIdVfpclassps:
+    case Inst::kIdVfpclasssd:
+    case Inst::kIdVfpclassss:
+      return FormatterInternal_formatImmBits(sb, u8, vfpclassxx, ASMJIT_ARRAY_SIZE(vfpclassxx));
+
+    case Inst::kIdVgetmantpd:
+    case Inst::kIdVgetmantps:
+    case Inst::kIdVgetmantsd:
+    case Inst::kIdVgetmantss:
+      return FormatterInternal_formatImmBits(sb, u8, vgetmantxx, ASMJIT_ARRAY_SIZE(vgetmantxx));
+
+    case Inst::kIdVpcmpb:
+    case Inst::kIdVpcmpd:
+    case Inst::kIdVpcmpq:
+    case Inst::kIdVpcmpw:
+    case Inst::kIdVpcmpub:
+    case Inst::kIdVpcmpud:
+    case Inst::kIdVpcmpuq:
+    case Inst::kIdVpcmpuw:
+      return FormatterInternal_formatImmText(sb, u8, 3, 0, vpcmpx);
+
+    case Inst::kIdVpcomb:
+    case Inst::kIdVpcomd:
+    case Inst::kIdVpcomq:
+    case Inst::kIdVpcomw:
+    case Inst::kIdVpcomub:
+    case Inst::kIdVpcomud:
+    case Inst::kIdVpcomuq:
+    case Inst::kIdVpcomuw:
+      return FormatterInternal_formatImmText(sb, u8, 3, 0, vpcomx);
+
+    case Inst::kIdVpermq:
+    case Inst::kIdVpermpd:
+      return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
+
+    case Inst::kIdVpternlogd:
+    case Inst::kIdVpternlogq:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, 8);
+
+    case Inst::kIdVrangepd:
+    case Inst::kIdVrangeps:
+    case Inst::kIdVrangesd:
+    case Inst::kIdVrangess:
+      return FormatterInternal_formatImmBits(sb, u8, vrangexx, ASMJIT_ARRAY_SIZE(vrangexx));
+
+    case Inst::kIdVreducepd:
+    case Inst::kIdVreduceps:
+    case Inst::kIdVreducesd:
+    case Inst::kIdVreducess:
+    case Inst::kIdVrndscalepd:
+    case Inst::kIdVrndscaleps:
+    case Inst::kIdVrndscalesd:
+    case Inst::kIdVrndscaless:
+      return FormatterInternal_formatImmBits(sb, u8, vreducexx_vrndscalexx, ASMJIT_ARRAY_SIZE(vreducexx_vrndscalexx));
+
+    case Inst::kIdVshuff32x4:
+    case Inst::kIdVshuff64x2:
+    case Inst::kIdVshufi32x4:
+    case Inst::kIdVshufi64x2: {
+      uint32_t count = Support::max<uint32_t>(vecSize / 16, 2u);
+      uint32_t bits = count <= 2 ? 1u : 2u;
+      return FormatterInternal_formatImmShuf(sb, u8, bits, count);
+    }
+
+    default:
+      return kErrorOk;
+  }
+}
+
+// x86::FormatterInternal - Format Instruction
+// ===========================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatInstruction(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept {
+
+  InstId instId = inst.id();
+  InstOptions options = inst.options();
+
+  // Format instruction options and instruction mnemonic.
+  if (instId < Inst::_kIdCount) {
+    // VEX|EVEX options.
+    if (Support::test(options, InstOptions::kX86_Vex))
+      ASMJIT_PROPAGATE(sb.append("{vex} "));
+
+    if (Support::test(options, InstOptions::kX86_Vex3))
+      ASMJIT_PROPAGATE(sb.append("{vex3} "));
+
+    if (Support::test(options, InstOptions::kX86_Evex))
+      ASMJIT_PROPAGATE(sb.append("{evex} "));
+
+    // MOD/RM and MOD/MR options
+    if (Support::test(options, InstOptions::kX86_ModRM))
+      ASMJIT_PROPAGATE(sb.append("{modrm} "));
+    else if (Support::test(options, InstOptions::kX86_ModMR))
+      ASMJIT_PROPAGATE(sb.append("{modmr} "));
+
+    // SHORT|LONG options.
+    if (Support::test(options, InstOptions::kShortForm))
+      ASMJIT_PROPAGATE(sb.append("short "));
+
+    if (Support::test(options, InstOptions::kLongForm))
+      ASMJIT_PROPAGATE(sb.append("long "));
+
+    // LOCK|XACQUIRE|XRELEASE options.
+    if (Support::test(options, InstOptions::kX86_XAcquire))
+      ASMJIT_PROPAGATE(sb.append("xacquire "));
+
+    if (Support::test(options, InstOptions::kX86_XRelease))
+      ASMJIT_PROPAGATE(sb.append("xrelease "));
+
+    if (Support::test(options, InstOptions::kX86_Lock))
+      ASMJIT_PROPAGATE(sb.append("lock "));
+
+    // REP|REPNE options.
+    if (Support::test(options, InstOptions::kX86_Rep | InstOptions::kX86_Repne)) {
+      sb.append(Support::test(options, InstOptions::kX86_Rep) ? "rep " : "repnz ");
+      if (inst.hasExtraReg()) {
+        ASMJIT_PROPAGATE(sb.append("{"));
+        ASMJIT_PROPAGATE(formatOperand(sb, formatFlags, emitter, arch, inst.extraReg().toReg<BaseReg>()));
+        ASMJIT_PROPAGATE(sb.append("} "));
+      }
+    }
+
+    // REX options.
+    if (Support::test(options, InstOptions::kX86_Rex)) {
+      const InstOptions kRXBWMask = InstOptions::kX86_OpCodeR |
+                                    InstOptions::kX86_OpCodeX |
+                                    InstOptions::kX86_OpCodeB |
+                                    InstOptions::kX86_OpCodeW ;
+      if (Support::test(options, kRXBWMask)) {
+        ASMJIT_PROPAGATE(sb.append("rex."));
+        if (Support::test(options, InstOptions::kX86_OpCodeR)) sb.append('r');
+        if (Support::test(options, InstOptions::kX86_OpCodeX)) sb.append('x');
+        if (Support::test(options, InstOptions::kX86_OpCodeB)) sb.append('b');
+        if (Support::test(options, InstOptions::kX86_OpCodeW)) sb.append('w');
+        sb.append(' ');
+      }
+      else {
+        ASMJIT_PROPAGATE(sb.append("rex "));
+      }
+    }
+
+    ASMJIT_PROPAGATE(InstInternal::instIdToString(arch, instId, sb));
+  }
+  else {
+    ASMJIT_PROPAGATE(sb.appendFormat("[InstId=#%u]", unsigned(instId)));
+  }
+
+  for (uint32_t i = 0; i < opCount; i++) {
+    const Operand_& op = operands[i];
+    if (op.isNone()) break;
+
+    ASMJIT_PROPAGATE(sb.append(i == 0 ? " " : ", "));
+    ASMJIT_PROPAGATE(formatOperand(sb, formatFlags, emitter, arch, op));
+
+    if (op.isImm() && uint32_t(formatFlags & FormatFlags::kExplainImms)) {
+      uint32_t vecSize = 16;
+      for (uint32_t j = 0; j < opCount; j++)
+        if (operands[j].isReg())
+          vecSize = Support::max<uint32_t>(vecSize, operands[j].size());
+      ASMJIT_PROPAGATE(FormatterInternal_explainConst(sb, formatFlags, instId, vecSize, op.as<Imm>()));
+    }
+
+    // Support AVX-512 masking - {k}{z}.
+    if (i == 0) {
+      if (inst.extraReg().group() == RegGroup::kX86_K) {
+        ASMJIT_PROPAGATE(sb.append(" {"));
+        ASMJIT_PROPAGATE(formatRegister(sb, formatFlags, emitter, arch, inst.extraReg().type(), inst.extraReg().id()));
+        ASMJIT_PROPAGATE(sb.append('}'));
+
+        if (Support::test(options, InstOptions::kX86_ZMask))
+          ASMJIT_PROPAGATE(sb.append("{z}"));
+      }
+      else if (Support::test(options, InstOptions::kX86_ZMask)) {
+        ASMJIT_PROPAGATE(sb.append(" {z}"));
+      }
+    }
+
+    // Support AVX-512 broadcast - {1tox}.
+    if (op.isMem() && op.as<Mem>().hasBroadcast()) {
+      ASMJIT_PROPAGATE(sb.appendFormat(" {1to%u}", Support::bitMask(uint32_t(op.as<Mem>().getBroadcast()))));
+    }
+  }
+
+  // Support AVX-512 embedded rounding and suppress-all-exceptions {sae}.
+  if (inst.hasOption(InstOptions::kX86_ER | InstOptions::kX86_SAE)) {
+    if (inst.hasOption(InstOptions::kX86_ER)) {
+      uint32_t bits = uint32_t(inst.options() & InstOptions::kX86_ERMask) >> Support::ConstCTZ<uint32_t(InstOptions::kX86_ERMask)>::value;
+
+      const char roundingModes[] = "rn\0rd\0ru\0rz";
+      ASMJIT_PROPAGATE(sb.appendFormat(", {%s-sae}", roundingModes + bits * 3));
+    }
+    else {
+      ASMJIT_PROPAGATE(sb.append(", {sae}"));
+    }
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
diff --git a/lib/lepton/asmjit/x86/x86formatter_p.h b/lib/lepton/asmjit/x86/x86formatter_p.h
new file mode 100644
index 0000000000..f37a8f6db1
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86formatter_p.h
@@ -0,0 +1,58 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86FORMATTER_P_H_INCLUDED
+#define ASMJIT_X86_X86FORMATTER_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/formatter.h"
+#include "../core/string.h"
+#include "../x86/x86globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+namespace FormatterInternal {
+
+Error ASMJIT_CDECL formatFeature(
+  String& sb,
+  uint32_t featureId) noexcept;
+
+Error ASMJIT_CDECL formatRegister(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  RegType regType,
+  uint32_t regId) noexcept;
+
+Error ASMJIT_CDECL formatOperand(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const Operand_& op) noexcept;
+
+Error ASMJIT_CDECL formatInstruction(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept;
+
+} // {FormatterInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
+#endif // ASMJIT_X86_X86FORMATTER_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86func.cpp b/lib/lepton/asmjit/x86/x86func.cpp
new file mode 100644
index 0000000000..bba9eef148
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86func.cpp
@@ -0,0 +1,503 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86)
+
+#include "../x86/x86func_p.h"
+#include "../x86/x86emithelper_p.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+namespace FuncInternal {
+
+static inline bool shouldThreatAsCDeclIn64BitMode(CallConvId ccId) noexcept {
+  return ccId == CallConvId::kCDecl ||
+         ccId == CallConvId::kStdCall ||
+         ccId == CallConvId::kThisCall ||
+         ccId == CallConvId::kFastCall ||
+         ccId == CallConvId::kRegParm1 ||
+         ccId == CallConvId::kRegParm2 ||
+         ccId == CallConvId::kRegParm3;
+}
+
+ASMJIT_FAVOR_SIZE Error initCallConv(CallConv& cc, CallConvId ccId, const Environment& environment) noexcept {
+  constexpr uint32_t kZax = Gp::kIdAx;
+  constexpr uint32_t kZbx = Gp::kIdBx;
+  constexpr uint32_t kZcx = Gp::kIdCx;
+  constexpr uint32_t kZdx = Gp::kIdDx;
+  constexpr uint32_t kZsp = Gp::kIdSp;
+  constexpr uint32_t kZbp = Gp::kIdBp;
+  constexpr uint32_t kZsi = Gp::kIdSi;
+  constexpr uint32_t kZdi = Gp::kIdDi;
+
+  bool winABI = environment.isPlatformWindows() || environment.isMSVC();
+
+  cc.setArch(environment.arch());
+  cc.setSaveRestoreRegSize(RegGroup::kVec, 16);
+  cc.setSaveRestoreRegSize(RegGroup::kX86_MM, 8);
+  cc.setSaveRestoreRegSize(RegGroup::kX86_K, 8);
+  cc.setSaveRestoreAlignment(RegGroup::kVec, 16);
+  cc.setSaveRestoreAlignment(RegGroup::kX86_MM, 8);
+  cc.setSaveRestoreAlignment(RegGroup::kX86_K, 8);
+
+  if (environment.is32Bit()) {
+    bool isStandardCallConv = true;
+
+    cc.setSaveRestoreRegSize(RegGroup::kGp, 4);
+    cc.setSaveRestoreAlignment(RegGroup::kGp, 4);
+
+    cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(Gp::kIdBx, Gp::kIdSp, Gp::kIdBp, Gp::kIdSi, Gp::kIdDi));
+    cc.setNaturalStackAlignment(4);
+
+    switch (ccId) {
+      case CallConvId::kCDecl:
+        break;
+
+      case CallConvId::kStdCall:
+        cc.setFlags(CallConvFlags::kCalleePopsStack);
+        break;
+
+      case CallConvId::kFastCall:
+        cc.setFlags(CallConvFlags::kCalleePopsStack);
+        cc.setPassedOrder(RegGroup::kGp, kZcx, kZdx);
+        break;
+
+      case CallConvId::kVectorCall:
+        cc.setFlags(CallConvFlags::kCalleePopsStack);
+        cc.setPassedOrder(RegGroup::kGp, kZcx, kZdx);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5);
+        break;
+
+      case CallConvId::kThisCall:
+        // NOTE: Even MINGW (starting with GCC 4.7.0) now uses __thiscall on MS Windows, so we won't bail to any
+        // other calling convention if __thiscall was specified.
+        if (winABI) {
+          cc.setFlags(CallConvFlags::kCalleePopsStack);
+          cc.setPassedOrder(RegGroup::kGp, kZcx);
+        }
+        else {
+          ccId = CallConvId::kCDecl;
+        }
+        break;
+
+      case CallConvId::kRegParm1:
+        cc.setPassedOrder(RegGroup::kGp, kZax);
+        break;
+
+      case CallConvId::kRegParm2:
+        cc.setPassedOrder(RegGroup::kGp, kZax, kZdx);
+        break;
+
+      case CallConvId::kRegParm3:
+        cc.setPassedOrder(RegGroup::kGp, kZax, kZdx, kZcx);
+        break;
+
+      case CallConvId::kLightCall2:
+      case CallConvId::kLightCall3:
+      case CallConvId::kLightCall4: {
+        uint32_t n = uint32_t(ccId) - uint32_t(CallConvId::kLightCall2) + 2;
+
+        cc.setFlags(CallConvFlags::kPassFloatsByVec);
+        cc.setPassedOrder(RegGroup::kGp, kZax, kZdx, kZcx, kZsi, kZdi);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPassedOrder(RegGroup::kX86_K, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPassedOrder(RegGroup::kX86_MM, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPreservedRegs(RegGroup::kGp, Support::lsbMask<uint32_t>(8));
+        cc.setPreservedRegs(RegGroup::kVec, Support::lsbMask<uint32_t>(8) & ~Support::lsbMask<uint32_t>(n));
+
+        cc.setNaturalStackAlignment(16);
+        isStandardCallConv = false;
+        break;
+      }
+
+      default:
+        return DebugUtils::errored(kErrorInvalidArgument);
+    }
+
+    if (isStandardCallConv) {
+      // MMX arguments is something where compiler vendors disagree. For example GCC and MSVC would pass first three
+      // via registers and the rest via stack, however Clang passes all via stack. Returning MMX registers is even
+      // more fun, where GCC uses MM0, but Clang uses EAX:EDX pair. I'm not sure it's something we should be worried
+      // about as MMX is deprecated anyway.
+      cc.setPassedOrder(RegGroup::kX86_MM, 0, 1, 2);
+
+      // Vector arguments (XMM|YMM|ZMM) are passed via registers. However, if the function is variadic then they have
+      // to be passed via stack.
+      cc.setPassedOrder(RegGroup::kVec, 0, 1, 2);
+
+      // Functions with variable arguments always use stack for MM and vector arguments.
+      cc.addFlags(CallConvFlags::kPassVecByStackIfVA);
+    }
+
+    if (ccId == CallConvId::kCDecl) {
+      cc.addFlags(CallConvFlags::kVarArgCompatible);
+    }
+  }
+  else {
+    cc.setSaveRestoreRegSize(RegGroup::kGp, 8);
+    cc.setSaveRestoreAlignment(RegGroup::kGp, 8);
+
+    // Preprocess the calling convention into a common id as many conventions are normally ignored even by C/C++
+    // compilers and treated as `__cdecl`.
+    if (shouldThreatAsCDeclIn64BitMode(ccId))
+      ccId = winABI ? CallConvId::kX64Windows : CallConvId::kX64SystemV;
+
+    switch (ccId) {
+      case CallConvId::kX64SystemV: {
+        cc.setFlags(CallConvFlags::kPassFloatsByVec |
+                    CallConvFlags::kPassMmxByXmm    |
+                    CallConvFlags::kVarArgCompatible);
+        cc.setNaturalStackAlignment(16);
+        cc.setRedZoneSize(128);
+        cc.setPassedOrder(RegGroup::kGp, kZdi, kZsi, kZdx, kZcx, 8, 9);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(kZbx, kZsp, kZbp, 12, 13, 14, 15));
+        break;
+      }
+
+      case CallConvId::kX64Windows: {
+        cc.setStrategy(CallConvStrategy::kX64Windows);
+        cc.setFlags(CallConvFlags::kPassFloatsByVec |
+                    CallConvFlags::kIndirectVecArgs |
+                    CallConvFlags::kPassMmxByGp     |
+                    CallConvFlags::kVarArgCompatible);
+        cc.setNaturalStackAlignment(16);
+        // Maximum 4 arguments in registers, each adds 8 bytes to the spill zone.
+        cc.setSpillZoneSize(4 * 8);
+        cc.setPassedOrder(RegGroup::kGp, kZcx, kZdx, 8, 9);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3);
+        cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(kZbx, kZsp, kZbp, kZsi, kZdi, 12, 13, 14, 15));
+        cc.setPreservedRegs(RegGroup::kVec, Support::bitMask(6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
+        break;
+      }
+
+      case CallConvId::kVectorCall: {
+        cc.setStrategy(CallConvStrategy::kX64VectorCall);
+        cc.setFlags(CallConvFlags::kPassFloatsByVec |
+                    CallConvFlags::kPassMmxByGp     );
+        cc.setNaturalStackAlignment(16);
+        // Maximum 6 arguments in registers, each adds 8 bytes to the spill zone.
+        cc.setSpillZoneSize(6 * 8);
+        cc.setPassedOrder(RegGroup::kGp, kZcx, kZdx, 8, 9);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5);
+        cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(kZbx, kZsp, kZbp, kZsi, kZdi, 12, 13, 14, 15));
+        cc.setPreservedRegs(RegGroup::kVec, Support::bitMask(6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
+        break;
+      }
+
+      case CallConvId::kLightCall2:
+      case CallConvId::kLightCall3:
+      case CallConvId::kLightCall4: {
+        uint32_t n = uint32_t(ccId) - uint32_t(CallConvId::kLightCall2) + 2;
+
+        cc.setFlags(CallConvFlags::kPassFloatsByVec);
+        cc.setNaturalStackAlignment(16);
+        cc.setPassedOrder(RegGroup::kGp, kZax, kZdx, kZcx, kZsi, kZdi);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPassedOrder(RegGroup::kX86_K, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPassedOrder(RegGroup::kX86_MM, 0, 1, 2, 3, 4, 5, 6, 7);
+
+        cc.setPreservedRegs(RegGroup::kGp, Support::lsbMask<uint32_t>(16));
+        cc.setPreservedRegs(RegGroup::kVec, ~Support::lsbMask<uint32_t>(n));
+        break;
+      }
+
+      default:
+        return DebugUtils::errored(kErrorInvalidArgument);
+    }
+  }
+
+  cc.setId(ccId);
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE void unpackValues(FuncDetail& func, FuncValuePack& pack) noexcept {
+  TypeId typeId = pack[0].typeId();
+  switch (typeId) {
+    case TypeId::kInt64:
+    case TypeId::kUInt64: {
+      if (Environment::is32Bit(func.callConv().arch())) {
+        // Convert a 64-bit return value to two 32-bit return values.
+        pack[0].initTypeId(TypeId::kUInt32);
+        pack[1].initTypeId(TypeId(uint32_t(typeId) - 2));
+        break;
+      }
+      break;
+    }
+
+    default: {
+      break;
+    }
+  }
+}
+
+ASMJIT_FAVOR_SIZE Error initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept {
+  const CallConv& cc = func.callConv();
+  Arch arch = cc.arch();
+  uint32_t stackOffset = cc._spillZoneSize;
+  uint32_t argCount = func.argCount();
+
+  // Up to two return values can be returned in GP registers.
+  static const uint8_t gpReturnIndexes[4] = {
+    uint8_t(Gp::kIdAx),
+    uint8_t(Gp::kIdDx),
+    uint8_t(BaseReg::kIdBad),
+    uint8_t(BaseReg::kIdBad)
+  };
+
+  if (func.hasRet()) {
+    unpackValues(func, func._rets);
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      TypeId typeId = func._rets[valueIndex].typeId();
+
+      // Terminate at the first void type (end of the pack).
+      if (typeId == TypeId::kVoid)
+        break;
+
+      switch (typeId) {
+        case TypeId::kInt64:
+        case TypeId::kUInt64: {
+          if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad)
+            func._rets[valueIndex].initReg(RegType::kX86_Gpq, gpReturnIndexes[valueIndex], typeId);
+          else
+            return DebugUtils::errored(kErrorInvalidState);
+          break;
+        }
+
+        case TypeId::kInt8:
+        case TypeId::kInt16:
+        case TypeId::kInt32: {
+          if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad)
+            func._rets[valueIndex].initReg(RegType::kX86_Gpd, gpReturnIndexes[valueIndex], TypeId::kInt32);
+          else
+            return DebugUtils::errored(kErrorInvalidState);
+          break;
+        }
+
+        case TypeId::kUInt8:
+        case TypeId::kUInt16:
+        case TypeId::kUInt32: {
+          if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad)
+            func._rets[valueIndex].initReg(RegType::kX86_Gpd, gpReturnIndexes[valueIndex], TypeId::kUInt32);
+          else
+            return DebugUtils::errored(kErrorInvalidState);
+          break;
+        }
+
+        case TypeId::kFloat32:
+        case TypeId::kFloat64: {
+          RegType regType = Environment::is32Bit(arch) ? RegType::kX86_St : RegType::kX86_Xmm;
+          func._rets[valueIndex].initReg(regType, valueIndex, typeId);
+          break;
+        }
+
+        case TypeId::kFloat80: {
+          // 80-bit floats are always returned by FP0.
+          func._rets[valueIndex].initReg(RegType::kX86_St, valueIndex, typeId);
+          break;
+        }
+
+        case TypeId::kMmx32:
+        case TypeId::kMmx64: {
+          // MM registers are returned through XMM (SystemV) or GPQ (Win64).
+          RegType regType = RegType::kX86_Mm;
+          uint32_t regIndex = valueIndex;
+          if (Environment::is64Bit(arch)) {
+            regType = cc.strategy() == CallConvStrategy::kDefault ? RegType::kX86_Xmm : RegType::kX86_Gpq;
+            regIndex = cc.strategy() == CallConvStrategy::kDefault ? valueIndex : gpReturnIndexes[valueIndex];
+
+            if (regIndex == BaseReg::kIdBad)
+              return DebugUtils::errored(kErrorInvalidState);
+          }
+
+          func._rets[valueIndex].initReg(regType, regIndex, typeId);
+          break;
+        }
+
+        default: {
+          func._rets[valueIndex].initReg(vecTypeIdToRegType(typeId), valueIndex, typeId);
+          break;
+        }
+      }
+    }
+  }
+
+  switch (cc.strategy()) {
+    case CallConvStrategy::kDefault: {
+      uint32_t gpzPos = 0;
+      uint32_t vecPos = 0;
+
+      for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+        unpackValues(func, func._args[argIndex]);
+
+        for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+          FuncValue& arg = func._args[argIndex][valueIndex];
+
+          // Terminate if there are no more arguments in the pack.
+          if (!arg)
+            break;
+
+          TypeId typeId = arg.typeId();
+
+          if (TypeUtils::isInt(typeId)) {
+            uint32_t regId = BaseReg::kIdBad;
+
+            if (gpzPos < CallConv::kMaxRegArgsPerGroup)
+              regId = cc._passedOrder[RegGroup::kGp].id[gpzPos];
+
+            if (regId != BaseReg::kIdBad) {
+              RegType regType = typeId <= TypeId::kUInt32 ? RegType::kX86_Gpd : RegType::kX86_Gpq;
+              arg.assignRegData(regType, regId);
+              func.addUsedRegs(RegGroup::kGp, Support::bitMask(regId));
+              gpzPos++;
+            }
+            else {
+              uint32_t size = Support::max<uint32_t>(TypeUtils::sizeOf(typeId), registerSize);
+              arg.assignStackOffset(int32_t(stackOffset));
+              stackOffset += size;
+            }
+            continue;
+          }
+
+          if (TypeUtils::isFloat(typeId) || TypeUtils::isVec(typeId)) {
+            uint32_t regId = BaseReg::kIdBad;
+
+            if (vecPos < CallConv::kMaxRegArgsPerGroup)
+              regId = cc._passedOrder[RegGroup::kVec].id[vecPos];
+
+            if (TypeUtils::isFloat(typeId)) {
+              // If this is a float, but `kFlagPassFloatsByVec` is false, we have to use stack instead. This should
+              // be only used by 32-bit calling conventions.
+              if (!cc.hasFlag(CallConvFlags::kPassFloatsByVec))
+                regId = BaseReg::kIdBad;
+            }
+            else {
+              // Pass vector registers via stack if this is a variable arguments function. This should be only used
+              // by 32-bit calling conventions.
+              if (signature.hasVarArgs() && cc.hasFlag(CallConvFlags::kPassVecByStackIfVA))
+                regId = BaseReg::kIdBad;
+            }
+
+            if (regId != BaseReg::kIdBad) {
+              arg.initTypeId(typeId);
+              arg.assignRegData(vecTypeIdToRegType(typeId), regId);
+              func.addUsedRegs(RegGroup::kVec, Support::bitMask(regId));
+              vecPos++;
+            }
+            else {
+              uint32_t size = TypeUtils::sizeOf(typeId);
+              arg.assignStackOffset(int32_t(stackOffset));
+              stackOffset += size;
+            }
+            continue;
+          }
+        }
+      }
+      break;
+    }
+
+    case CallConvStrategy::kX64Windows:
+    case CallConvStrategy::kX64VectorCall: {
+      // Both X64 and VectorCall behave similarly - arguments are indexed from left to right. The position of the
+      // argument determines in which register the argument is allocated, so it's either GP or one of XMM/YMM/ZMM
+      // registers.
+      //
+      //       [       X64       ] [VecCall]
+      // Index: #0   #1   #2   #3   #4   #5
+      //
+      // GP   : RCX  RDX  R8   R9
+      // VEC  : XMM0 XMM1 XMM2 XMM3 XMM4 XMM5
+      //
+      // For example function `f(int a, double b, int c, double d)` will be:
+      //
+      //        (a)  (b)  (c)  (d)
+      //        RCX  XMM1 R8   XMM3
+      //
+      // Unused vector registers are used by HVA.
+      bool isVectorCall = (cc.strategy() == CallConvStrategy::kX64VectorCall);
+
+      for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+        unpackValues(func, func._args[argIndex]);
+
+        for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+          FuncValue& arg = func._args[argIndex][valueIndex];
+
+          // Terminate if there are no more arguments in the pack.
+          if (!arg)
+            break;
+
+          TypeId typeId = arg.typeId();
+          uint32_t size = TypeUtils::sizeOf(typeId);
+
+          if (TypeUtils::isInt(typeId) || TypeUtils::isMmx(typeId)) {
+            uint32_t regId = BaseReg::kIdBad;
+
+            if (argIndex < CallConv::kMaxRegArgsPerGroup)
+              regId = cc._passedOrder[RegGroup::kGp].id[argIndex];
+
+            if (regId != BaseReg::kIdBad) {
+              RegType regType = size <= 4 && !TypeUtils::isMmx(typeId) ? RegType::kX86_Gpd : RegType::kX86_Gpq;
+              arg.assignRegData(regType, regId);
+              func.addUsedRegs(RegGroup::kGp, Support::bitMask(regId));
+            }
+            else {
+              arg.assignStackOffset(int32_t(stackOffset));
+              stackOffset += 8;
+            }
+            continue;
+          }
+
+          if (TypeUtils::isFloat(typeId) || TypeUtils::isVec(typeId)) {
+            uint32_t regId = BaseReg::kIdBad;
+
+            if (argIndex < CallConv::kMaxRegArgsPerGroup)
+              regId = cc._passedOrder[RegGroup::kVec].id[argIndex];
+
+            if (regId != BaseReg::kIdBad) {
+              // X64-ABI doesn't allow vector types (XMM|YMM|ZMM) to be passed via registers, however, VectorCall
+              // was designed for that purpose.
+              if (TypeUtils::isFloat(typeId) || isVectorCall) {
+                RegType regType = vecTypeIdToRegType(typeId);
+                arg.assignRegData(regType, regId);
+                func.addUsedRegs(RegGroup::kVec, Support::bitMask(regId));
+                continue;
+              }
+            }
+
+            // Passed via stack if the argument is float/double or indirectly. The trap is - if the argument is
+            // passed indirectly, the address can be passed via register, if the argument's index has GP one.
+            if (TypeUtils::isFloat(typeId)) {
+              arg.assignStackOffset(int32_t(stackOffset));
+            }
+            else {
+              uint32_t gpRegId = cc._passedOrder[RegGroup::kGp].id[argIndex];
+              if (gpRegId != BaseReg::kIdBad)
+                arg.assignRegData(RegType::kX86_Gpq, gpRegId);
+              else
+                arg.assignStackOffset(int32_t(stackOffset));
+              arg.addFlags(FuncValue::kFlagIsIndirect);
+            }
+
+            // Always 8 bytes (float/double/pointer).
+            stackOffset += 8;
+            continue;
+          }
+        }
+      }
+      break;
+    }
+  }
+
+  func._argStackSize = stackOffset;
+  return kErrorOk;
+}
+
+} // {FuncInternal}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86
diff --git a/lib/lepton/asmjit/x86/x86func_p.h b/lib/lepton/asmjit/x86/x86func_p.h
new file mode 100644
index 0000000000..0fe1da14d9
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86func_p.h
@@ -0,0 +1,33 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86FUNC_P_H_INCLUDED
+#define ASMJIT_X86_X86FUNC_P_H_INCLUDED
+
+#include "../core/func.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86-specific function API (calling conventions and other utilities).
+namespace FuncInternal {
+
+//! Initialize `CallConv` structure (X86 specific).
+Error initCallConv(CallConv& cc, CallConvId ccId, const Environment& environment) noexcept;
+
+//! Initialize `FuncDetail` (X86 specific).
+Error initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept;
+
+} // {FuncInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86FUNC_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86globals.h b/lib/lepton/asmjit/x86/x86globals.h
new file mode 100644
index 0000000000..803c813ac5
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86globals.h
@@ -0,0 +1,2169 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86GLOBALS_H_INCLUDED
+#define ASMJIT_X86_X86GLOBALS_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/inst.h"
+
+//! \namespace asmjit::x86
+//! \ingroup asmjit_x86
+//!
+//! X86/X64 API.
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \addtogroup asmjit_x86
+//! \{
+
+//! Condition code.
+enum class CondCode : uint8_t {
+  kO             = 0x00u,       //!<         OF==1
+  kNO            = 0x01u,       //!<         OF==0
+  kC             = 0x02u,       //!< CF==1
+  kB             = 0x02u,       //!< CF==1          (unsigned < )
+  kNAE           = 0x02u,       //!< CF==1          (unsigned < )
+  kNC            = 0x03u,       //!< CF==0
+  kAE            = 0x03u,       //!< CF==0          (unsigned >=)
+  kNB            = 0x03u,       //!< CF==0          (unsigned >=)
+  kE             = 0x04u,       //!<         ZF==1  (any_sign ==)
+  kZ             = 0x04u,       //!<         ZF==1  (any_sign ==)
+  kNE            = 0x05u,       //!<         ZF==0  (any_sign !=)
+  kNZ            = 0x05u,       //!<         ZF==0  (any_sign !=)
+  kBE            = 0x06u,       //!< CF==1 | ZF==1  (unsigned <=)
+  kNA            = 0x06u,       //!< CF==1 | ZF==1  (unsigned <=)
+  kA             = 0x07u,       //!< CF==0 & ZF==0  (unsigned > )
+  kNBE           = 0x07u,       //!< CF==0 & ZF==0  (unsigned > )
+  kS             = 0x08u,       //!<         SF==1  (is negative)
+  kNS            = 0x09u,       //!<         SF==0  (is positive or zero)
+  kP             = 0x0Au,       //!< PF==1
+  kPE            = 0x0Au,       //!< PF==1
+  kPO            = 0x0Bu,       //!< PF==0
+  kNP            = 0x0Bu,       //!< PF==0
+  kL             = 0x0Cu,       //!<         SF!=OF (signed < )
+  kNGE           = 0x0Cu,       //!<         SF!=OF (signed < )
+  kGE            = 0x0Du,       //!<         SF==OF (signed >=)
+  kNL            = 0x0Du,       //!<         SF==OF (signed >=)
+  kLE            = 0x0Eu,       //!< ZF==1 | SF!=OF (signed <=)
+  kNG            = 0x0Eu,       //!< ZF==1 | SF!=OF (signed <=)
+  kG             = 0x0Fu,       //!< ZF==0 & SF==OF (signed > )
+  kNLE           = 0x0Fu,       //!< ZF==0 & SF==OF (signed > )
+
+  kZero          = kZ,          //!< Zero flag.
+  kNotZero       = kNZ,         //!< Non-zero flag.
+
+  kSign          = kS,          //!< Sign flag.
+  kNotSign       = kNS,         //!< No sign flag.
+
+  kNegative      = kS,          //!< Sign flag.
+  kPositive      = kNS,         //!< No sign flag.
+
+  kOverflow      = kO,          //!< Overflow (signed).
+  kNotOverflow   = kNO,         //!< Not overflow (signed).
+
+  kEqual         = kE,          //!< `a == b` (equal).
+  kNotEqual      = kNE,         //!< `a != b` (not equal).
+
+  kSignedLT      = kL,          //!< `a <  b` (signed).
+  kSignedLE      = kLE,         //!< `a <= b` (signed).
+  kSignedGT      = kG,          //!< `a >  b` (signed).
+  kSignedGE      = kGE,         //!< `a >= b` (signed).
+
+  kUnsignedLT    = kB,          //!< `a <  b` (unsigned).
+  kUnsignedLE    = kBE,         //!< `a <= b` (unsigned).
+  kUnsignedGT    = kA,          //!< `a >  b` (unsigned).
+  kUnsignedGE    = kAE,         //!< `a >= b` (unsigned).
+
+  kParityEven    = kP,          //!< Even parity flag.
+  kParityOdd     = kPO,         //!< Odd parity flag.
+
+  kMaxValue      = 0x0Fu
+};
+
+//! \cond
+static constexpr CondCode _reverseCondTable[] = {
+  CondCode::kO,  // O  <- O
+  CondCode::kNO, // NO <- NO
+  CondCode::kA , // A  <- B
+  CondCode::kBE, // BE <- AE
+  CondCode::kE,  // E  <- E
+  CondCode::kNE, // NE <- NE
+  CondCode::kAE, // AE <- BE
+  CondCode::kB , // B  <- A
+  CondCode::kS,  // S  <- S
+  CondCode::kNS, // NS <- NS
+  CondCode::kPE, // PE <- PE
+  CondCode::kPO, // PO <- PO
+  CondCode::kG,  // G  <- L
+  CondCode::kLE, // LE <- GE
+  CondCode::kGE, // GE <- LE
+  CondCode::kL   // L  <- G
+};
+//! \endcond
+
+//! Reverses a condition code (reverses the corresponding operands of a comparison).
+static inline constexpr CondCode reverseCond(CondCode cond) noexcept { return _reverseCondTable[uint8_t(cond)]; }
+//! Negates a condition code.
+static inline constexpr CondCode negateCond(CondCode cond) noexcept { return CondCode(uint8_t(cond) ^ 1u); }
+
+//! Instruction.
+//!
+//! \note Only used to hold x86-specific instruction identifiers and some additional helper functions.
+namespace Inst {
+  //! Instruction id.
+  enum Id : uint32_t {
+    // ${InstId:Begin}
+    kIdNone = 0,                         //!< Invalid instruction id.
+    kIdAaa,                              //!< Instruction 'aaa' (X86).
+    kIdAad,                              //!< Instruction 'aad' (X86).
+    kIdAam,                              //!< Instruction 'aam' (X86).
+    kIdAas,                              //!< Instruction 'aas' (X86).
+    kIdAdc,                              //!< Instruction 'adc'.
+    kIdAdcx,                             //!< Instruction 'adcx' {ADX}.
+    kIdAdd,                              //!< Instruction 'add'.
+    kIdAddpd,                            //!< Instruction 'addpd' {SSE2}.
+    kIdAddps,                            //!< Instruction 'addps' {SSE}.
+    kIdAddsd,                            //!< Instruction 'addsd' {SSE2}.
+    kIdAddss,                            //!< Instruction 'addss' {SSE}.
+    kIdAddsubpd,                         //!< Instruction 'addsubpd' {SSE3}.
+    kIdAddsubps,                         //!< Instruction 'addsubps' {SSE3}.
+    kIdAdox,                             //!< Instruction 'adox' {ADX}.
+    kIdAesdec,                           //!< Instruction 'aesdec' {AESNI}.
+    kIdAesdeclast,                       //!< Instruction 'aesdeclast' {AESNI}.
+    kIdAesenc,                           //!< Instruction 'aesenc' {AESNI}.
+    kIdAesenclast,                       //!< Instruction 'aesenclast' {AESNI}.
+    kIdAesimc,                           //!< Instruction 'aesimc' {AESNI}.
+    kIdAeskeygenassist,                  //!< Instruction 'aeskeygenassist' {AESNI}.
+    kIdAnd,                              //!< Instruction 'and'.
+    kIdAndn,                             //!< Instruction 'andn' {BMI}.
+    kIdAndnpd,                           //!< Instruction 'andnpd' {SSE2}.
+    kIdAndnps,                           //!< Instruction 'andnps' {SSE}.
+    kIdAndpd,                            //!< Instruction 'andpd' {SSE2}.
+    kIdAndps,                            //!< Instruction 'andps' {SSE}.
+    kIdArpl,                             //!< Instruction 'arpl' (X86).
+    kIdBextr,                            //!< Instruction 'bextr' {BMI}.
+    kIdBlcfill,                          //!< Instruction 'blcfill' {TBM}.
+    kIdBlci,                             //!< Instruction 'blci' {TBM}.
+    kIdBlcic,                            //!< Instruction 'blcic' {TBM}.
+    kIdBlcmsk,                           //!< Instruction 'blcmsk' {TBM}.
+    kIdBlcs,                             //!< Instruction 'blcs' {TBM}.
+    kIdBlendpd,                          //!< Instruction 'blendpd' {SSE4_1}.
+    kIdBlendps,                          //!< Instruction 'blendps' {SSE4_1}.
+    kIdBlendvpd,                         //!< Instruction 'blendvpd' {SSE4_1}.
+    kIdBlendvps,                         //!< Instruction 'blendvps' {SSE4_1}.
+    kIdBlsfill,                          //!< Instruction 'blsfill' {TBM}.
+    kIdBlsi,                             //!< Instruction 'blsi' {BMI}.
+    kIdBlsic,                            //!< Instruction 'blsic' {TBM}.
+    kIdBlsmsk,                           //!< Instruction 'blsmsk' {BMI}.
+    kIdBlsr,                             //!< Instruction 'blsr' {BMI}.
+    kIdBndcl,                            //!< Instruction 'bndcl' {MPX}.
+    kIdBndcn,                            //!< Instruction 'bndcn' {MPX}.
+    kIdBndcu,                            //!< Instruction 'bndcu' {MPX}.
+    kIdBndldx,                           //!< Instruction 'bndldx' {MPX}.
+    kIdBndmk,                            //!< Instruction 'bndmk' {MPX}.
+    kIdBndmov,                           //!< Instruction 'bndmov' {MPX}.
+    kIdBndstx,                           //!< Instruction 'bndstx' {MPX}.
+    kIdBound,                            //!< Instruction 'bound' (X86).
+    kIdBsf,                              //!< Instruction 'bsf'.
+    kIdBsr,                              //!< Instruction 'bsr'.
+    kIdBswap,                            //!< Instruction 'bswap'.
+    kIdBt,                               //!< Instruction 'bt'.
+    kIdBtc,                              //!< Instruction 'btc'.
+    kIdBtr,                              //!< Instruction 'btr'.
+    kIdBts,                              //!< Instruction 'bts'.
+    kIdBzhi,                             //!< Instruction 'bzhi' {BMI2}.
+    kIdCall,                             //!< Instruction 'call'.
+    kIdCbw,                              //!< Instruction 'cbw'.
+    kIdCdq,                              //!< Instruction 'cdq'.
+    kIdCdqe,                             //!< Instruction 'cdqe' (X64).
+    kIdClac,                             //!< Instruction 'clac' {SMAP}.
+    kIdClc,                              //!< Instruction 'clc'.
+    kIdCld,                              //!< Instruction 'cld'.
+    kIdCldemote,                         //!< Instruction 'cldemote' {CLDEMOTE}.
+    kIdClflush,                          //!< Instruction 'clflush' {CLFLUSH}.
+    kIdClflushopt,                       //!< Instruction 'clflushopt' {CLFLUSHOPT}.
+    kIdClgi,                             //!< Instruction 'clgi' {SVM}.
+    kIdCli,                              //!< Instruction 'cli'.
+    kIdClrssbsy,                         //!< Instruction 'clrssbsy' {CET_SS}.
+    kIdClts,                             //!< Instruction 'clts'.
+    kIdClui,                             //!< Instruction 'clui' {UINTR} (X64).
+    kIdClwb,                             //!< Instruction 'clwb' {CLWB}.
+    kIdClzero,                           //!< Instruction 'clzero' {CLZERO}.
+    kIdCmc,                              //!< Instruction 'cmc'.
+    kIdCmova,                            //!< Instruction 'cmova' {CMOV}.
+    kIdCmovae,                           //!< Instruction 'cmovae' {CMOV}.
+    kIdCmovb,                            //!< Instruction 'cmovb' {CMOV}.
+    kIdCmovbe,                           //!< Instruction 'cmovbe' {CMOV}.
+    kIdCmovc,                            //!< Instruction 'cmovc' {CMOV}.
+    kIdCmove,                            //!< Instruction 'cmove' {CMOV}.
+    kIdCmovg,                            //!< Instruction 'cmovg' {CMOV}.
+    kIdCmovge,                           //!< Instruction 'cmovge' {CMOV}.
+    kIdCmovl,                            //!< Instruction 'cmovl' {CMOV}.
+    kIdCmovle,                           //!< Instruction 'cmovle' {CMOV}.
+    kIdCmovna,                           //!< Instruction 'cmovna' {CMOV}.
+    kIdCmovnae,                          //!< Instruction 'cmovnae' {CMOV}.
+    kIdCmovnb,                           //!< Instruction 'cmovnb' {CMOV}.
+    kIdCmovnbe,                          //!< Instruction 'cmovnbe' {CMOV}.
+    kIdCmovnc,                           //!< Instruction 'cmovnc' {CMOV}.
+    kIdCmovne,                           //!< Instruction 'cmovne' {CMOV}.
+    kIdCmovng,                           //!< Instruction 'cmovng' {CMOV}.
+    kIdCmovnge,                          //!< Instruction 'cmovnge' {CMOV}.
+    kIdCmovnl,                           //!< Instruction 'cmovnl' {CMOV}.
+    kIdCmovnle,                          //!< Instruction 'cmovnle' {CMOV}.
+    kIdCmovno,                           //!< Instruction 'cmovno' {CMOV}.
+    kIdCmovnp,                           //!< Instruction 'cmovnp' {CMOV}.
+    kIdCmovns,                           //!< Instruction 'cmovns' {CMOV}.
+    kIdCmovnz,                           //!< Instruction 'cmovnz' {CMOV}.
+    kIdCmovo,                            //!< Instruction 'cmovo' {CMOV}.
+    kIdCmovp,                            //!< Instruction 'cmovp' {CMOV}.
+    kIdCmovpe,                           //!< Instruction 'cmovpe' {CMOV}.
+    kIdCmovpo,                           //!< Instruction 'cmovpo' {CMOV}.
+    kIdCmovs,                            //!< Instruction 'cmovs' {CMOV}.
+    kIdCmovz,                            //!< Instruction 'cmovz' {CMOV}.
+    kIdCmp,                              //!< Instruction 'cmp'.
+    kIdCmppd,                            //!< Instruction 'cmppd' {SSE2}.
+    kIdCmpps,                            //!< Instruction 'cmpps' {SSE}.
+    kIdCmps,                             //!< Instruction 'cmps'.
+    kIdCmpsd,                            //!< Instruction 'cmpsd' {SSE2}.
+    kIdCmpss,                            //!< Instruction 'cmpss' {SSE}.
+    kIdCmpxchg,                          //!< Instruction 'cmpxchg' {I486}.
+    kIdCmpxchg16b,                       //!< Instruction 'cmpxchg16b' {CMPXCHG16B} (X64).
+    kIdCmpxchg8b,                        //!< Instruction 'cmpxchg8b' {CMPXCHG8B}.
+    kIdComisd,                           //!< Instruction 'comisd' {SSE2}.
+    kIdComiss,                           //!< Instruction 'comiss' {SSE}.
+    kIdCpuid,                            //!< Instruction 'cpuid' {I486}.
+    kIdCqo,                              //!< Instruction 'cqo' (X64).
+    kIdCrc32,                            //!< Instruction 'crc32' {SSE4_2}.
+    kIdCvtdq2pd,                         //!< Instruction 'cvtdq2pd' {SSE2}.
+    kIdCvtdq2ps,                         //!< Instruction 'cvtdq2ps' {SSE2}.
+    kIdCvtpd2dq,                         //!< Instruction 'cvtpd2dq' {SSE2}.
+    kIdCvtpd2pi,                         //!< Instruction 'cvtpd2pi' {SSE2}.
+    kIdCvtpd2ps,                         //!< Instruction 'cvtpd2ps' {SSE2}.
+    kIdCvtpi2pd,                         //!< Instruction 'cvtpi2pd' {SSE2}.
+    kIdCvtpi2ps,                         //!< Instruction 'cvtpi2ps' {SSE}.
+    kIdCvtps2dq,                         //!< Instruction 'cvtps2dq' {SSE2}.
+    kIdCvtps2pd,                         //!< Instruction 'cvtps2pd' {SSE2}.
+    kIdCvtps2pi,                         //!< Instruction 'cvtps2pi' {SSE}.
+    kIdCvtsd2si,                         //!< Instruction 'cvtsd2si' {SSE2}.
+    kIdCvtsd2ss,                         //!< Instruction 'cvtsd2ss' {SSE2}.
+    kIdCvtsi2sd,                         //!< Instruction 'cvtsi2sd' {SSE2}.
+    kIdCvtsi2ss,                         //!< Instruction 'cvtsi2ss' {SSE}.
+    kIdCvtss2sd,                         //!< Instruction 'cvtss2sd' {SSE2}.
+    kIdCvtss2si,                         //!< Instruction 'cvtss2si' {SSE}.
+    kIdCvttpd2dq,                        //!< Instruction 'cvttpd2dq' {SSE2}.
+    kIdCvttpd2pi,                        //!< Instruction 'cvttpd2pi' {SSE2}.
+    kIdCvttps2dq,                        //!< Instruction 'cvttps2dq' {SSE2}.
+    kIdCvttps2pi,                        //!< Instruction 'cvttps2pi' {SSE}.
+    kIdCvttsd2si,                        //!< Instruction 'cvttsd2si' {SSE2}.
+    kIdCvttss2si,                        //!< Instruction 'cvttss2si' {SSE}.
+    kIdCwd,                              //!< Instruction 'cwd'.
+    kIdCwde,                             //!< Instruction 'cwde'.
+    kIdDaa,                              //!< Instruction 'daa' (X86).
+    kIdDas,                              //!< Instruction 'das' (X86).
+    kIdDec,                              //!< Instruction 'dec'.
+    kIdDiv,                              //!< Instruction 'div'.
+    kIdDivpd,                            //!< Instruction 'divpd' {SSE2}.
+    kIdDivps,                            //!< Instruction 'divps' {SSE}.
+    kIdDivsd,                            //!< Instruction 'divsd' {SSE2}.
+    kIdDivss,                            //!< Instruction 'divss' {SSE}.
+    kIdDppd,                             //!< Instruction 'dppd' {SSE4_1}.
+    kIdDpps,                             //!< Instruction 'dpps' {SSE4_1}.
+    kIdEmms,                             //!< Instruction 'emms' {MMX}.
+    kIdEndbr32,                          //!< Instruction 'endbr32' {CET_IBT}.
+    kIdEndbr64,                          //!< Instruction 'endbr64' {CET_IBT}.
+    kIdEnqcmd,                           //!< Instruction 'enqcmd' {ENQCMD}.
+    kIdEnqcmds,                          //!< Instruction 'enqcmds' {ENQCMD}.
+    kIdEnter,                            //!< Instruction 'enter'.
+    kIdExtractps,                        //!< Instruction 'extractps' {SSE4_1}.
+    kIdExtrq,                            //!< Instruction 'extrq' {SSE4A}.
+    kIdF2xm1,                            //!< Instruction 'f2xm1'.
+    kIdFabs,                             //!< Instruction 'fabs'.
+    kIdFadd,                             //!< Instruction 'fadd'.
+    kIdFaddp,                            //!< Instruction 'faddp'.
+    kIdFbld,                             //!< Instruction 'fbld'.
+    kIdFbstp,                            //!< Instruction 'fbstp'.
+    kIdFchs,                             //!< Instruction 'fchs'.
+    kIdFclex,                            //!< Instruction 'fclex'.
+    kIdFcmovb,                           //!< Instruction 'fcmovb' {CMOV}.
+    kIdFcmovbe,                          //!< Instruction 'fcmovbe' {CMOV}.
+    kIdFcmove,                           //!< Instruction 'fcmove' {CMOV}.
+    kIdFcmovnb,                          //!< Instruction 'fcmovnb' {CMOV}.
+    kIdFcmovnbe,                         //!< Instruction 'fcmovnbe' {CMOV}.
+    kIdFcmovne,                          //!< Instruction 'fcmovne' {CMOV}.
+    kIdFcmovnu,                          //!< Instruction 'fcmovnu' {CMOV}.
+    kIdFcmovu,                           //!< Instruction 'fcmovu' {CMOV}.
+    kIdFcom,                             //!< Instruction 'fcom'.
+    kIdFcomi,                            //!< Instruction 'fcomi'.
+    kIdFcomip,                           //!< Instruction 'fcomip'.
+    kIdFcomp,                            //!< Instruction 'fcomp'.
+    kIdFcompp,                           //!< Instruction 'fcompp'.
+    kIdFcos,                             //!< Instruction 'fcos'.
+    kIdFdecstp,                          //!< Instruction 'fdecstp'.
+    kIdFdiv,                             //!< Instruction 'fdiv'.
+    kIdFdivp,                            //!< Instruction 'fdivp'.
+    kIdFdivr,                            //!< Instruction 'fdivr'.
+    kIdFdivrp,                           //!< Instruction 'fdivrp'.
+    kIdFemms,                            //!< Instruction 'femms' {3DNOW}.
+    kIdFfree,                            //!< Instruction 'ffree'.
+    kIdFiadd,                            //!< Instruction 'fiadd'.
+    kIdFicom,                            //!< Instruction 'ficom'.
+    kIdFicomp,                           //!< Instruction 'ficomp'.
+    kIdFidiv,                            //!< Instruction 'fidiv'.
+    kIdFidivr,                           //!< Instruction 'fidivr'.
+    kIdFild,                             //!< Instruction 'fild'.
+    kIdFimul,                            //!< Instruction 'fimul'.
+    kIdFincstp,                          //!< Instruction 'fincstp'.
+    kIdFinit,                            //!< Instruction 'finit'.
+    kIdFist,                             //!< Instruction 'fist'.
+    kIdFistp,                            //!< Instruction 'fistp'.
+    kIdFisttp,                           //!< Instruction 'fisttp' {SSE3}.
+    kIdFisub,                            //!< Instruction 'fisub'.
+    kIdFisubr,                           //!< Instruction 'fisubr'.
+    kIdFld,                              //!< Instruction 'fld'.
+    kIdFld1,                             //!< Instruction 'fld1'.
+    kIdFldcw,                            //!< Instruction 'fldcw'.
+    kIdFldenv,                           //!< Instruction 'fldenv'.
+    kIdFldl2e,                           //!< Instruction 'fldl2e'.
+    kIdFldl2t,                           //!< Instruction 'fldl2t'.
+    kIdFldlg2,                           //!< Instruction 'fldlg2'.
+    kIdFldln2,                           //!< Instruction 'fldln2'.
+    kIdFldpi,                            //!< Instruction 'fldpi'.
+    kIdFldz,                             //!< Instruction 'fldz'.
+    kIdFmul,                             //!< Instruction 'fmul'.
+    kIdFmulp,                            //!< Instruction 'fmulp'.
+    kIdFnclex,                           //!< Instruction 'fnclex'.
+    kIdFninit,                           //!< Instruction 'fninit'.
+    kIdFnop,                             //!< Instruction 'fnop'.
+    kIdFnsave,                           //!< Instruction 'fnsave'.
+    kIdFnstcw,                           //!< Instruction 'fnstcw'.
+    kIdFnstenv,                          //!< Instruction 'fnstenv'.
+    kIdFnstsw,                           //!< Instruction 'fnstsw'.
+    kIdFpatan,                           //!< Instruction 'fpatan'.
+    kIdFprem,                            //!< Instruction 'fprem'.
+    kIdFprem1,                           //!< Instruction 'fprem1'.
+    kIdFptan,                            //!< Instruction 'fptan'.
+    kIdFrndint,                          //!< Instruction 'frndint'.
+    kIdFrstor,                           //!< Instruction 'frstor'.
+    kIdFsave,                            //!< Instruction 'fsave'.
+    kIdFscale,                           //!< Instruction 'fscale'.
+    kIdFsin,                             //!< Instruction 'fsin'.
+    kIdFsincos,                          //!< Instruction 'fsincos'.
+    kIdFsqrt,                            //!< Instruction 'fsqrt'.
+    kIdFst,                              //!< Instruction 'fst'.
+    kIdFstcw,                            //!< Instruction 'fstcw'.
+    kIdFstenv,                           //!< Instruction 'fstenv'.
+    kIdFstp,                             //!< Instruction 'fstp'.
+    kIdFstsw,                            //!< Instruction 'fstsw'.
+    kIdFsub,                             //!< Instruction 'fsub'.
+    kIdFsubp,                            //!< Instruction 'fsubp'.
+    kIdFsubr,                            //!< Instruction 'fsubr'.
+    kIdFsubrp,                           //!< Instruction 'fsubrp'.
+    kIdFtst,                             //!< Instruction 'ftst'.
+    kIdFucom,                            //!< Instruction 'fucom'.
+    kIdFucomi,                           //!< Instruction 'fucomi'.
+    kIdFucomip,                          //!< Instruction 'fucomip'.
+    kIdFucomp,                           //!< Instruction 'fucomp'.
+    kIdFucompp,                          //!< Instruction 'fucompp'.
+    kIdFwait,                            //!< Instruction 'fwait'.
+    kIdFxam,                             //!< Instruction 'fxam'.
+    kIdFxch,                             //!< Instruction 'fxch'.
+    kIdFxrstor,                          //!< Instruction 'fxrstor' {FXSR}.
+    kIdFxrstor64,                        //!< Instruction 'fxrstor64' {FXSR} (X64).
+    kIdFxsave,                           //!< Instruction 'fxsave' {FXSR}.
+    kIdFxsave64,                         //!< Instruction 'fxsave64' {FXSR} (X64).
+    kIdFxtract,                          //!< Instruction 'fxtract'.
+    kIdFyl2x,                            //!< Instruction 'fyl2x'.
+    kIdFyl2xp1,                          //!< Instruction 'fyl2xp1'.
+    kIdGetsec,                           //!< Instruction 'getsec' {SMX}.
+    kIdGf2p8affineinvqb,                 //!< Instruction 'gf2p8affineinvqb' {GFNI}.
+    kIdGf2p8affineqb,                    //!< Instruction 'gf2p8affineqb' {GFNI}.
+    kIdGf2p8mulb,                        //!< Instruction 'gf2p8mulb' {GFNI}.
+    kIdHaddpd,                           //!< Instruction 'haddpd' {SSE3}.
+    kIdHaddps,                           //!< Instruction 'haddps' {SSE3}.
+    kIdHlt,                              //!< Instruction 'hlt'.
+    kIdHreset,                           //!< Instruction 'hreset' {HRESET}.
+    kIdHsubpd,                           //!< Instruction 'hsubpd' {SSE3}.
+    kIdHsubps,                           //!< Instruction 'hsubps' {SSE3}.
+    kIdIdiv,                             //!< Instruction 'idiv'.
+    kIdImul,                             //!< Instruction 'imul'.
+    kIdIn,                               //!< Instruction 'in'.
+    kIdInc,                              //!< Instruction 'inc'.
+    kIdIncsspd,                          //!< Instruction 'incsspd' {CET_SS}.
+    kIdIncsspq,                          //!< Instruction 'incsspq' {CET_SS} (X64).
+    kIdIns,                              //!< Instruction 'ins'.
+    kIdInsertps,                         //!< Instruction 'insertps' {SSE4_1}.
+    kIdInsertq,                          //!< Instruction 'insertq' {SSE4A}.
+    kIdInt,                              //!< Instruction 'int'.
+    kIdInt3,                             //!< Instruction 'int3'.
+    kIdInto,                             //!< Instruction 'into' (X86).
+    kIdInvd,                             //!< Instruction 'invd' {I486}.
+    kIdInvept,                           //!< Instruction 'invept' {VMX}.
+    kIdInvlpg,                           //!< Instruction 'invlpg' {I486}.
+    kIdInvlpga,                          //!< Instruction 'invlpga' {SVM}.
+    kIdInvpcid,                          //!< Instruction 'invpcid' {I486}.
+    kIdInvvpid,                          //!< Instruction 'invvpid' {VMX}.
+    kIdIret,                             //!< Instruction 'iret'.
+    kIdIretd,                            //!< Instruction 'iretd'.
+    kIdIretq,                            //!< Instruction 'iretq' (X64).
+    kIdJa,                               //!< Instruction 'ja'.
+    kIdJae,                              //!< Instruction 'jae'.
+    kIdJb,                               //!< Instruction 'jb'.
+    kIdJbe,                              //!< Instruction 'jbe'.
+    kIdJc,                               //!< Instruction 'jc'.
+    kIdJe,                               //!< Instruction 'je'.
+    kIdJecxz,                            //!< Instruction 'jecxz'.
+    kIdJg,                               //!< Instruction 'jg'.
+    kIdJge,                              //!< Instruction 'jge'.
+    kIdJl,                               //!< Instruction 'jl'.
+    kIdJle,                              //!< Instruction 'jle'.
+    kIdJmp,                              //!< Instruction 'jmp'.
+    kIdJna,                              //!< Instruction 'jna'.
+    kIdJnae,                             //!< Instruction 'jnae'.
+    kIdJnb,                              //!< Instruction 'jnb'.
+    kIdJnbe,                             //!< Instruction 'jnbe'.
+    kIdJnc,                              //!< Instruction 'jnc'.
+    kIdJne,                              //!< Instruction 'jne'.
+    kIdJng,                              //!< Instruction 'jng'.
+    kIdJnge,                             //!< Instruction 'jnge'.
+    kIdJnl,                              //!< Instruction 'jnl'.
+    kIdJnle,                             //!< Instruction 'jnle'.
+    kIdJno,                              //!< Instruction 'jno'.
+    kIdJnp,                              //!< Instruction 'jnp'.
+    kIdJns,                              //!< Instruction 'jns'.
+    kIdJnz,                              //!< Instruction 'jnz'.
+    kIdJo,                               //!< Instruction 'jo'.
+    kIdJp,                               //!< Instruction 'jp'.
+    kIdJpe,                              //!< Instruction 'jpe'.
+    kIdJpo,                              //!< Instruction 'jpo'.
+    kIdJs,                               //!< Instruction 'js'.
+    kIdJz,                               //!< Instruction 'jz'.
+    kIdKaddb,                            //!< Instruction 'kaddb' {AVX512_DQ}.
+    kIdKaddd,                            //!< Instruction 'kaddd' {AVX512_BW}.
+    kIdKaddq,                            //!< Instruction 'kaddq' {AVX512_BW}.
+    kIdKaddw,                            //!< Instruction 'kaddw' {AVX512_DQ}.
+    kIdKandb,                            //!< Instruction 'kandb' {AVX512_DQ}.
+    kIdKandd,                            //!< Instruction 'kandd' {AVX512_BW}.
+    kIdKandnb,                           //!< Instruction 'kandnb' {AVX512_DQ}.
+    kIdKandnd,                           //!< Instruction 'kandnd' {AVX512_BW}.
+    kIdKandnq,                           //!< Instruction 'kandnq' {AVX512_BW}.
+    kIdKandnw,                           //!< Instruction 'kandnw' {AVX512_F}.
+    kIdKandq,                            //!< Instruction 'kandq' {AVX512_BW}.
+    kIdKandw,                            //!< Instruction 'kandw' {AVX512_F}.
+    kIdKmovb,                            //!< Instruction 'kmovb' {AVX512_DQ}.
+    kIdKmovd,                            //!< Instruction 'kmovd' {AVX512_BW}.
+    kIdKmovq,                            //!< Instruction 'kmovq' {AVX512_BW}.
+    kIdKmovw,                            //!< Instruction 'kmovw' {AVX512_F}.
+    kIdKnotb,                            //!< Instruction 'knotb' {AVX512_DQ}.
+    kIdKnotd,                            //!< Instruction 'knotd' {AVX512_BW}.
+    kIdKnotq,                            //!< Instruction 'knotq' {AVX512_BW}.
+    kIdKnotw,                            //!< Instruction 'knotw' {AVX512_F}.
+    kIdKorb,                             //!< Instruction 'korb' {AVX512_DQ}.
+    kIdKord,                             //!< Instruction 'kord' {AVX512_BW}.
+    kIdKorq,                             //!< Instruction 'korq' {AVX512_BW}.
+    kIdKortestb,                         //!< Instruction 'kortestb' {AVX512_DQ}.
+    kIdKortestd,                         //!< Instruction 'kortestd' {AVX512_BW}.
+    kIdKortestq,                         //!< Instruction 'kortestq' {AVX512_BW}.
+    kIdKortestw,                         //!< Instruction 'kortestw' {AVX512_F}.
+    kIdKorw,                             //!< Instruction 'korw' {AVX512_F}.
+    kIdKshiftlb,                         //!< Instruction 'kshiftlb' {AVX512_DQ}.
+    kIdKshiftld,                         //!< Instruction 'kshiftld' {AVX512_BW}.
+    kIdKshiftlq,                         //!< Instruction 'kshiftlq' {AVX512_BW}.
+    kIdKshiftlw,                         //!< Instruction 'kshiftlw' {AVX512_F}.
+    kIdKshiftrb,                         //!< Instruction 'kshiftrb' {AVX512_DQ}.
+    kIdKshiftrd,                         //!< Instruction 'kshiftrd' {AVX512_BW}.
+    kIdKshiftrq,                         //!< Instruction 'kshiftrq' {AVX512_BW}.
+    kIdKshiftrw,                         //!< Instruction 'kshiftrw' {AVX512_F}.
+    kIdKtestb,                           //!< Instruction 'ktestb' {AVX512_DQ}.
+    kIdKtestd,                           //!< Instruction 'ktestd' {AVX512_BW}.
+    kIdKtestq,                           //!< Instruction 'ktestq' {AVX512_BW}.
+    kIdKtestw,                           //!< Instruction 'ktestw' {AVX512_DQ}.
+    kIdKunpckbw,                         //!< Instruction 'kunpckbw' {AVX512_F}.
+    kIdKunpckdq,                         //!< Instruction 'kunpckdq' {AVX512_BW}.
+    kIdKunpckwd,                         //!< Instruction 'kunpckwd' {AVX512_BW}.
+    kIdKxnorb,                           //!< Instruction 'kxnorb' {AVX512_DQ}.
+    kIdKxnord,                           //!< Instruction 'kxnord' {AVX512_BW}.
+    kIdKxnorq,                           //!< Instruction 'kxnorq' {AVX512_BW}.
+    kIdKxnorw,                           //!< Instruction 'kxnorw' {AVX512_F}.
+    kIdKxorb,                            //!< Instruction 'kxorb' {AVX512_DQ}.
+    kIdKxord,                            //!< Instruction 'kxord' {AVX512_BW}.
+    kIdKxorq,                            //!< Instruction 'kxorq' {AVX512_BW}.
+    kIdKxorw,                            //!< Instruction 'kxorw' {AVX512_F}.
+    kIdLahf,                             //!< Instruction 'lahf' {LAHFSAHF}.
+    kIdLar,                              //!< Instruction 'lar'.
+    kIdLcall,                            //!< Instruction 'lcall'.
+    kIdLddqu,                            //!< Instruction 'lddqu' {SSE3}.
+    kIdLdmxcsr,                          //!< Instruction 'ldmxcsr' {SSE}.
+    kIdLds,                              //!< Instruction 'lds' (X86).
+    kIdLdtilecfg,                        //!< Instruction 'ldtilecfg' {AMX_TILE} (X64).
+    kIdLea,                              //!< Instruction 'lea'.
+    kIdLeave,                            //!< Instruction 'leave'.
+    kIdLes,                              //!< Instruction 'les' (X86).
+    kIdLfence,                           //!< Instruction 'lfence' {SSE2}.
+    kIdLfs,                              //!< Instruction 'lfs'.
+    kIdLgdt,                             //!< Instruction 'lgdt'.
+    kIdLgs,                              //!< Instruction 'lgs'.
+    kIdLidt,                             //!< Instruction 'lidt'.
+    kIdLjmp,                             //!< Instruction 'ljmp'.
+    kIdLldt,                             //!< Instruction 'lldt'.
+    kIdLlwpcb,                           //!< Instruction 'llwpcb' {LWP}.
+    kIdLmsw,                             //!< Instruction 'lmsw'.
+    kIdLods,                             //!< Instruction 'lods'.
+    kIdLoop,                             //!< Instruction 'loop'.
+    kIdLoope,                            //!< Instruction 'loope'.
+    kIdLoopne,                           //!< Instruction 'loopne'.
+    kIdLsl,                              //!< Instruction 'lsl'.
+    kIdLss,                              //!< Instruction 'lss'.
+    kIdLtr,                              //!< Instruction 'ltr'.
+    kIdLwpins,                           //!< Instruction 'lwpins' {LWP}.
+    kIdLwpval,                           //!< Instruction 'lwpval' {LWP}.
+    kIdLzcnt,                            //!< Instruction 'lzcnt' {LZCNT}.
+    kIdMaskmovdqu,                       //!< Instruction 'maskmovdqu' {SSE2}.
+    kIdMaskmovq,                         //!< Instruction 'maskmovq' {MMX2}.
+    kIdMaxpd,                            //!< Instruction 'maxpd' {SSE2}.
+    kIdMaxps,                            //!< Instruction 'maxps' {SSE}.
+    kIdMaxsd,                            //!< Instruction 'maxsd' {SSE2}.
+    kIdMaxss,                            //!< Instruction 'maxss' {SSE}.
+    kIdMcommit,                          //!< Instruction 'mcommit' {MCOMMIT}.
+    kIdMfence,                           //!< Instruction 'mfence' {SSE2}.
+    kIdMinpd,                            //!< Instruction 'minpd' {SSE2}.
+    kIdMinps,                            //!< Instruction 'minps' {SSE}.
+    kIdMinsd,                            //!< Instruction 'minsd' {SSE2}.
+    kIdMinss,                            //!< Instruction 'minss' {SSE}.
+    kIdMonitor,                          //!< Instruction 'monitor' {MONITOR}.
+    kIdMonitorx,                         //!< Instruction 'monitorx' {MONITORX}.
+    kIdMov,                              //!< Instruction 'mov'.
+    kIdMovabs,                           //!< Instruction 'movabs' (X64).
+    kIdMovapd,                           //!< Instruction 'movapd' {SSE2}.
+    kIdMovaps,                           //!< Instruction 'movaps' {SSE}.
+    kIdMovbe,                            //!< Instruction 'movbe' {MOVBE}.
+    kIdMovd,                             //!< Instruction 'movd' {MMX|SSE2}.
+    kIdMovddup,                          //!< Instruction 'movddup' {SSE3}.
+    kIdMovdir64b,                        //!< Instruction 'movdir64b' {MOVDIR64B}.
+    kIdMovdiri,                          //!< Instruction 'movdiri' {MOVDIRI}.
+    kIdMovdq2q,                          //!< Instruction 'movdq2q' {SSE2}.
+    kIdMovdqa,                           //!< Instruction 'movdqa' {SSE2}.
+    kIdMovdqu,                           //!< Instruction 'movdqu' {SSE2}.
+    kIdMovhlps,                          //!< Instruction 'movhlps' {SSE}.
+    kIdMovhpd,                           //!< Instruction 'movhpd' {SSE2}.
+    kIdMovhps,                           //!< Instruction 'movhps' {SSE}.
+    kIdMovlhps,                          //!< Instruction 'movlhps' {SSE}.
+    kIdMovlpd,                           //!< Instruction 'movlpd' {SSE2}.
+    kIdMovlps,                           //!< Instruction 'movlps' {SSE}.
+    kIdMovmskpd,                         //!< Instruction 'movmskpd' {SSE2}.
+    kIdMovmskps,                         //!< Instruction 'movmskps' {SSE}.
+    kIdMovntdq,                          //!< Instruction 'movntdq' {SSE2}.
+    kIdMovntdqa,                         //!< Instruction 'movntdqa' {SSE4_1}.
+    kIdMovnti,                           //!< Instruction 'movnti' {SSE2}.
+    kIdMovntpd,                          //!< Instruction 'movntpd' {SSE2}.
+    kIdMovntps,                          //!< Instruction 'movntps' {SSE}.
+    kIdMovntq,                           //!< Instruction 'movntq' {MMX2}.
+    kIdMovntsd,                          //!< Instruction 'movntsd' {SSE4A}.
+    kIdMovntss,                          //!< Instruction 'movntss' {SSE4A}.
+    kIdMovq,                             //!< Instruction 'movq' {MMX|SSE2}.
+    kIdMovq2dq,                          //!< Instruction 'movq2dq' {SSE2}.
+    kIdMovs,                             //!< Instruction 'movs'.
+    kIdMovsd,                            //!< Instruction 'movsd' {SSE2}.
+    kIdMovshdup,                         //!< Instruction 'movshdup' {SSE3}.
+    kIdMovsldup,                         //!< Instruction 'movsldup' {SSE3}.
+    kIdMovss,                            //!< Instruction 'movss' {SSE}.
+    kIdMovsx,                            //!< Instruction 'movsx'.
+    kIdMovsxd,                           //!< Instruction 'movsxd' (X64).
+    kIdMovupd,                           //!< Instruction 'movupd' {SSE2}.
+    kIdMovups,                           //!< Instruction 'movups' {SSE}.
+    kIdMovzx,                            //!< Instruction 'movzx'.
+    kIdMpsadbw,                          //!< Instruction 'mpsadbw' {SSE4_1}.
+    kIdMul,                              //!< Instruction 'mul'.
+    kIdMulpd,                            //!< Instruction 'mulpd' {SSE2}.
+    kIdMulps,                            //!< Instruction 'mulps' {SSE}.
+    kIdMulsd,                            //!< Instruction 'mulsd' {SSE2}.
+    kIdMulss,                            //!< Instruction 'mulss' {SSE}.
+    kIdMulx,                             //!< Instruction 'mulx' {BMI2}.
+    kIdMwait,                            //!< Instruction 'mwait' {MONITOR}.
+    kIdMwaitx,                           //!< Instruction 'mwaitx' {MONITORX}.
+    kIdNeg,                              //!< Instruction 'neg'.
+    kIdNop,                              //!< Instruction 'nop'.
+    kIdNot,                              //!< Instruction 'not'.
+    kIdOr,                               //!< Instruction 'or'.
+    kIdOrpd,                             //!< Instruction 'orpd' {SSE2}.
+    kIdOrps,                             //!< Instruction 'orps' {SSE}.
+    kIdOut,                              //!< Instruction 'out'.
+    kIdOuts,                             //!< Instruction 'outs'.
+    kIdPabsb,                            //!< Instruction 'pabsb' {SSSE3}.
+    kIdPabsd,                            //!< Instruction 'pabsd' {SSSE3}.
+    kIdPabsw,                            //!< Instruction 'pabsw' {SSSE3}.
+    kIdPackssdw,                         //!< Instruction 'packssdw' {MMX|SSE2}.
+    kIdPacksswb,                         //!< Instruction 'packsswb' {MMX|SSE2}.
+    kIdPackusdw,                         //!< Instruction 'packusdw' {SSE4_1}.
+    kIdPackuswb,                         //!< Instruction 'packuswb' {MMX|SSE2}.
+    kIdPaddb,                            //!< Instruction 'paddb' {MMX|SSE2}.
+    kIdPaddd,                            //!< Instruction 'paddd' {MMX|SSE2}.
+    kIdPaddq,                            //!< Instruction 'paddq' {SSE2}.
+    kIdPaddsb,                           //!< Instruction 'paddsb' {MMX|SSE2}.
+    kIdPaddsw,                           //!< Instruction 'paddsw' {MMX|SSE2}.
+    kIdPaddusb,                          //!< Instruction 'paddusb' {MMX|SSE2}.
+    kIdPaddusw,                          //!< Instruction 'paddusw' {MMX|SSE2}.
+    kIdPaddw,                            //!< Instruction 'paddw' {MMX|SSE2}.
+    kIdPalignr,                          //!< Instruction 'palignr' {SSE3}.
+    kIdPand,                             //!< Instruction 'pand' {MMX|SSE2}.
+    kIdPandn,                            //!< Instruction 'pandn' {MMX|SSE2}.
+    kIdPause,                            //!< Instruction 'pause'.
+    kIdPavgb,                            //!< Instruction 'pavgb' {MMX2|SSE2}.
+    kIdPavgusb,                          //!< Instruction 'pavgusb' {3DNOW}.
+    kIdPavgw,                            //!< Instruction 'pavgw' {MMX2|SSE2}.
+    kIdPblendvb,                         //!< Instruction 'pblendvb' {SSE4_1}.
+    kIdPblendw,                          //!< Instruction 'pblendw' {SSE4_1}.
+    kIdPclmulqdq,                        //!< Instruction 'pclmulqdq' {PCLMULQDQ}.
+    kIdPcmpeqb,                          //!< Instruction 'pcmpeqb' {MMX|SSE2}.
+    kIdPcmpeqd,                          //!< Instruction 'pcmpeqd' {MMX|SSE2}.
+    kIdPcmpeqq,                          //!< Instruction 'pcmpeqq' {SSE4_1}.
+    kIdPcmpeqw,                          //!< Instruction 'pcmpeqw' {MMX|SSE2}.
+    kIdPcmpestri,                        //!< Instruction 'pcmpestri' {SSE4_2}.
+    kIdPcmpestrm,                        //!< Instruction 'pcmpestrm' {SSE4_2}.
+    kIdPcmpgtb,                          //!< Instruction 'pcmpgtb' {MMX|SSE2}.
+    kIdPcmpgtd,                          //!< Instruction 'pcmpgtd' {MMX|SSE2}.
+    kIdPcmpgtq,                          //!< Instruction 'pcmpgtq' {SSE4_2}.
+    kIdPcmpgtw,                          //!< Instruction 'pcmpgtw' {MMX|SSE2}.
+    kIdPcmpistri,                        //!< Instruction 'pcmpistri' {SSE4_2}.
+    kIdPcmpistrm,                        //!< Instruction 'pcmpistrm' {SSE4_2}.
+    kIdPconfig,                          //!< Instruction 'pconfig' {PCONFIG}.
+    kIdPdep,                             //!< Instruction 'pdep' {BMI2}.
+    kIdPext,                             //!< Instruction 'pext' {BMI2}.
+    kIdPextrb,                           //!< Instruction 'pextrb' {SSE4_1}.
+    kIdPextrd,                           //!< Instruction 'pextrd' {SSE4_1}.
+    kIdPextrq,                           //!< Instruction 'pextrq' {SSE4_1} (X64).
+    kIdPextrw,                           //!< Instruction 'pextrw' {MMX2|SSE2|SSE4_1}.
+    kIdPf2id,                            //!< Instruction 'pf2id' {3DNOW}.
+    kIdPf2iw,                            //!< Instruction 'pf2iw' {3DNOW2}.
+    kIdPfacc,                            //!< Instruction 'pfacc' {3DNOW}.
+    kIdPfadd,                            //!< Instruction 'pfadd' {3DNOW}.
+    kIdPfcmpeq,                          //!< Instruction 'pfcmpeq' {3DNOW}.
+    kIdPfcmpge,                          //!< Instruction 'pfcmpge' {3DNOW}.
+    kIdPfcmpgt,                          //!< Instruction 'pfcmpgt' {3DNOW}.
+    kIdPfmax,                            //!< Instruction 'pfmax' {3DNOW}.
+    kIdPfmin,                            //!< Instruction 'pfmin' {3DNOW}.
+    kIdPfmul,                            //!< Instruction 'pfmul' {3DNOW}.
+    kIdPfnacc,                           //!< Instruction 'pfnacc' {3DNOW2}.
+    kIdPfpnacc,                          //!< Instruction 'pfpnacc' {3DNOW2}.
+    kIdPfrcp,                            //!< Instruction 'pfrcp' {3DNOW}.
+    kIdPfrcpit1,                         //!< Instruction 'pfrcpit1' {3DNOW}.
+    kIdPfrcpit2,                         //!< Instruction 'pfrcpit2' {3DNOW}.
+    kIdPfrcpv,                           //!< Instruction 'pfrcpv' {GEODE}.
+    kIdPfrsqit1,                         //!< Instruction 'pfrsqit1' {3DNOW}.
+    kIdPfrsqrt,                          //!< Instruction 'pfrsqrt' {3DNOW}.
+    kIdPfrsqrtv,                         //!< Instruction 'pfrsqrtv' {GEODE}.
+    kIdPfsub,                            //!< Instruction 'pfsub' {3DNOW}.
+    kIdPfsubr,                           //!< Instruction 'pfsubr' {3DNOW}.
+    kIdPhaddd,                           //!< Instruction 'phaddd' {SSSE3}.
+    kIdPhaddsw,                          //!< Instruction 'phaddsw' {SSSE3}.
+    kIdPhaddw,                           //!< Instruction 'phaddw' {SSSE3}.
+    kIdPhminposuw,                       //!< Instruction 'phminposuw' {SSE4_1}.
+    kIdPhsubd,                           //!< Instruction 'phsubd' {SSSE3}.
+    kIdPhsubsw,                          //!< Instruction 'phsubsw' {SSSE3}.
+    kIdPhsubw,                           //!< Instruction 'phsubw' {SSSE3}.
+    kIdPi2fd,                            //!< Instruction 'pi2fd' {3DNOW}.
+    kIdPi2fw,                            //!< Instruction 'pi2fw' {3DNOW2}.
+    kIdPinsrb,                           //!< Instruction 'pinsrb' {SSE4_1}.
+    kIdPinsrd,                           //!< Instruction 'pinsrd' {SSE4_1}.
+    kIdPinsrq,                           //!< Instruction 'pinsrq' {SSE4_1} (X64).
+    kIdPinsrw,                           //!< Instruction 'pinsrw' {MMX2|SSE2}.
+    kIdPmaddubsw,                        //!< Instruction 'pmaddubsw' {SSSE3}.
+    kIdPmaddwd,                          //!< Instruction 'pmaddwd' {MMX|SSE2}.
+    kIdPmaxsb,                           //!< Instruction 'pmaxsb' {SSE4_1}.
+    kIdPmaxsd,                           //!< Instruction 'pmaxsd' {SSE4_1}.
+    kIdPmaxsw,                           //!< Instruction 'pmaxsw' {MMX2|SSE2}.
+    kIdPmaxub,                           //!< Instruction 'pmaxub' {MMX2|SSE2}.
+    kIdPmaxud,                           //!< Instruction 'pmaxud' {SSE4_1}.
+    kIdPmaxuw,                           //!< Instruction 'pmaxuw' {SSE4_1}.
+    kIdPminsb,                           //!< Instruction 'pminsb' {SSE4_1}.
+    kIdPminsd,                           //!< Instruction 'pminsd' {SSE4_1}.
+    kIdPminsw,                           //!< Instruction 'pminsw' {MMX2|SSE2}.
+    kIdPminub,                           //!< Instruction 'pminub' {MMX2|SSE2}.
+    kIdPminud,                           //!< Instruction 'pminud' {SSE4_1}.
+    kIdPminuw,                           //!< Instruction 'pminuw' {SSE4_1}.
+    kIdPmovmskb,                         //!< Instruction 'pmovmskb' {MMX2|SSE2}.
+    kIdPmovsxbd,                         //!< Instruction 'pmovsxbd' {SSE4_1}.
+    kIdPmovsxbq,                         //!< Instruction 'pmovsxbq' {SSE4_1}.
+    kIdPmovsxbw,                         //!< Instruction 'pmovsxbw' {SSE4_1}.
+    kIdPmovsxdq,                         //!< Instruction 'pmovsxdq' {SSE4_1}.
+    kIdPmovsxwd,                         //!< Instruction 'pmovsxwd' {SSE4_1}.
+    kIdPmovsxwq,                         //!< Instruction 'pmovsxwq' {SSE4_1}.
+    kIdPmovzxbd,                         //!< Instruction 'pmovzxbd' {SSE4_1}.
+    kIdPmovzxbq,                         //!< Instruction 'pmovzxbq' {SSE4_1}.
+    kIdPmovzxbw,                         //!< Instruction 'pmovzxbw' {SSE4_1}.
+    kIdPmovzxdq,                         //!< Instruction 'pmovzxdq' {SSE4_1}.
+    kIdPmovzxwd,                         //!< Instruction 'pmovzxwd' {SSE4_1}.
+    kIdPmovzxwq,                         //!< Instruction 'pmovzxwq' {SSE4_1}.
+    kIdPmuldq,                           //!< Instruction 'pmuldq' {SSE4_1}.
+    kIdPmulhrsw,                         //!< Instruction 'pmulhrsw' {SSSE3}.
+    kIdPmulhrw,                          //!< Instruction 'pmulhrw' {3DNOW}.
+    kIdPmulhuw,                          //!< Instruction 'pmulhuw' {MMX2|SSE2}.
+    kIdPmulhw,                           //!< Instruction 'pmulhw' {MMX|SSE2}.
+    kIdPmulld,                           //!< Instruction 'pmulld' {SSE4_1}.
+    kIdPmullw,                           //!< Instruction 'pmullw' {MMX|SSE2}.
+    kIdPmuludq,                          //!< Instruction 'pmuludq' {SSE2}.
+    kIdPop,                              //!< Instruction 'pop'.
+    kIdPopa,                             //!< Instruction 'popa' (X86).
+    kIdPopad,                            //!< Instruction 'popad' (X86).
+    kIdPopcnt,                           //!< Instruction 'popcnt' {POPCNT}.
+    kIdPopf,                             //!< Instruction 'popf'.
+    kIdPopfd,                            //!< Instruction 'popfd' (X86).
+    kIdPopfq,                            //!< Instruction 'popfq' (X64).
+    kIdPor,                              //!< Instruction 'por' {MMX|SSE2}.
+    kIdPrefetch,                         //!< Instruction 'prefetch' {3DNOW}.
+    kIdPrefetchnta,                      //!< Instruction 'prefetchnta' {MMX2}.
+    kIdPrefetcht0,                       //!< Instruction 'prefetcht0' {MMX2}.
+    kIdPrefetcht1,                       //!< Instruction 'prefetcht1' {MMX2}.
+    kIdPrefetcht2,                       //!< Instruction 'prefetcht2' {MMX2}.
+    kIdPrefetchw,                        //!< Instruction 'prefetchw' {PREFETCHW}.
+    kIdPrefetchwt1,                      //!< Instruction 'prefetchwt1' {PREFETCHWT1}.
+    kIdPsadbw,                           //!< Instruction 'psadbw' {MMX2|SSE2}.
+    kIdPshufb,                           //!< Instruction 'pshufb' {SSSE3}.
+    kIdPshufd,                           //!< Instruction 'pshufd' {SSE2}.
+    kIdPshufhw,                          //!< Instruction 'pshufhw' {SSE2}.
+    kIdPshuflw,                          //!< Instruction 'pshuflw' {SSE2}.
+    kIdPshufw,                           //!< Instruction 'pshufw' {MMX2}.
+    kIdPsignb,                           //!< Instruction 'psignb' {SSSE3}.
+    kIdPsignd,                           //!< Instruction 'psignd' {SSSE3}.
+    kIdPsignw,                           //!< Instruction 'psignw' {SSSE3}.
+    kIdPslld,                            //!< Instruction 'pslld' {MMX|SSE2}.
+    kIdPslldq,                           //!< Instruction 'pslldq' {SSE2}.
+    kIdPsllq,                            //!< Instruction 'psllq' {MMX|SSE2}.
+    kIdPsllw,                            //!< Instruction 'psllw' {MMX|SSE2}.
+    kIdPsmash,                           //!< Instruction 'psmash' {SNP} (X64).
+    kIdPsrad,                            //!< Instruction 'psrad' {MMX|SSE2}.
+    kIdPsraw,                            //!< Instruction 'psraw' {MMX|SSE2}.
+    kIdPsrld,                            //!< Instruction 'psrld' {MMX|SSE2}.
+    kIdPsrldq,                           //!< Instruction 'psrldq' {SSE2}.
+    kIdPsrlq,                            //!< Instruction 'psrlq' {MMX|SSE2}.
+    kIdPsrlw,                            //!< Instruction 'psrlw' {MMX|SSE2}.
+    kIdPsubb,                            //!< Instruction 'psubb' {MMX|SSE2}.
+    kIdPsubd,                            //!< Instruction 'psubd' {MMX|SSE2}.
+    kIdPsubq,                            //!< Instruction 'psubq' {SSE2}.
+    kIdPsubsb,                           //!< Instruction 'psubsb' {MMX|SSE2}.
+    kIdPsubsw,                           //!< Instruction 'psubsw' {MMX|SSE2}.
+    kIdPsubusb,                          //!< Instruction 'psubusb' {MMX|SSE2}.
+    kIdPsubusw,                          //!< Instruction 'psubusw' {MMX|SSE2}.
+    kIdPsubw,                            //!< Instruction 'psubw' {MMX|SSE2}.
+    kIdPswapd,                           //!< Instruction 'pswapd' {3DNOW2}.
+    kIdPtest,                            //!< Instruction 'ptest' {SSE4_1}.
+    kIdPtwrite,                          //!< Instruction 'ptwrite' {PTWRITE}.
+    kIdPunpckhbw,                        //!< Instruction 'punpckhbw' {MMX|SSE2}.
+    kIdPunpckhdq,                        //!< Instruction 'punpckhdq' {MMX|SSE2}.
+    kIdPunpckhqdq,                       //!< Instruction 'punpckhqdq' {SSE2}.
+    kIdPunpckhwd,                        //!< Instruction 'punpckhwd' {MMX|SSE2}.
+    kIdPunpcklbw,                        //!< Instruction 'punpcklbw' {MMX|SSE2}.
+    kIdPunpckldq,                        //!< Instruction 'punpckldq' {MMX|SSE2}.
+    kIdPunpcklqdq,                       //!< Instruction 'punpcklqdq' {SSE2}.
+    kIdPunpcklwd,                        //!< Instruction 'punpcklwd' {MMX|SSE2}.
+    kIdPush,                             //!< Instruction 'push'.
+    kIdPusha,                            //!< Instruction 'pusha' (X86).
+    kIdPushad,                           //!< Instruction 'pushad' (X86).
+    kIdPushf,                            //!< Instruction 'pushf'.
+    kIdPushfd,                           //!< Instruction 'pushfd' (X86).
+    kIdPushfq,                           //!< Instruction 'pushfq' (X64).
+    kIdPvalidate,                        //!< Instruction 'pvalidate' {SNP}.
+    kIdPxor,                             //!< Instruction 'pxor' {MMX|SSE2}.
+    kIdRcl,                              //!< Instruction 'rcl'.
+    kIdRcpps,                            //!< Instruction 'rcpps' {SSE}.
+    kIdRcpss,                            //!< Instruction 'rcpss' {SSE}.
+    kIdRcr,                              //!< Instruction 'rcr'.
+    kIdRdfsbase,                         //!< Instruction 'rdfsbase' {FSGSBASE} (X64).
+    kIdRdgsbase,                         //!< Instruction 'rdgsbase' {FSGSBASE} (X64).
+    kIdRdmsr,                            //!< Instruction 'rdmsr' {MSR}.
+    kIdRdpid,                            //!< Instruction 'rdpid' {RDPID}.
+    kIdRdpkru,                           //!< Instruction 'rdpkru' {OSPKE}.
+    kIdRdpmc,                            //!< Instruction 'rdpmc'.
+    kIdRdpru,                            //!< Instruction 'rdpru' {RDPRU}.
+    kIdRdrand,                           //!< Instruction 'rdrand' {RDRAND}.
+    kIdRdseed,                           //!< Instruction 'rdseed' {RDSEED}.
+    kIdRdsspd,                           //!< Instruction 'rdsspd' {CET_SS}.
+    kIdRdsspq,                           //!< Instruction 'rdsspq' {CET_SS} (X64).
+    kIdRdtsc,                            //!< Instruction 'rdtsc' {RDTSC}.
+    kIdRdtscp,                           //!< Instruction 'rdtscp' {RDTSCP}.
+    kIdRet,                              //!< Instruction 'ret'.
+    kIdRetf,                             //!< Instruction 'retf'.
+    kIdRmpadjust,                        //!< Instruction 'rmpadjust' {SNP} (X64).
+    kIdRmpupdate,                        //!< Instruction 'rmpupdate' {SNP} (X64).
+    kIdRol,                              //!< Instruction 'rol'.
+    kIdRor,                              //!< Instruction 'ror'.
+    kIdRorx,                             //!< Instruction 'rorx' {BMI2}.
+    kIdRoundpd,                          //!< Instruction 'roundpd' {SSE4_1}.
+    kIdRoundps,                          //!< Instruction 'roundps' {SSE4_1}.
+    kIdRoundsd,                          //!< Instruction 'roundsd' {SSE4_1}.
+    kIdRoundss,                          //!< Instruction 'roundss' {SSE4_1}.
+    kIdRsm,                              //!< Instruction 'rsm' (X86).
+    kIdRsqrtps,                          //!< Instruction 'rsqrtps' {SSE}.
+    kIdRsqrtss,                          //!< Instruction 'rsqrtss' {SSE}.
+    kIdRstorssp,                         //!< Instruction 'rstorssp' {CET_SS}.
+    kIdSahf,                             //!< Instruction 'sahf' {LAHFSAHF}.
+    kIdSal,                              //!< Instruction 'sal'.
+    kIdSar,                              //!< Instruction 'sar'.
+    kIdSarx,                             //!< Instruction 'sarx' {BMI2}.
+    kIdSaveprevssp,                      //!< Instruction 'saveprevssp' {CET_SS}.
+    kIdSbb,                              //!< Instruction 'sbb'.
+    kIdScas,                             //!< Instruction 'scas'.
+    kIdSenduipi,                         //!< Instruction 'senduipi' {UINTR} (X64).
+    kIdSerialize,                        //!< Instruction 'serialize' {SERIALIZE}.
+    kIdSeta,                             //!< Instruction 'seta'.
+    kIdSetae,                            //!< Instruction 'setae'.
+    kIdSetb,                             //!< Instruction 'setb'.
+    kIdSetbe,                            //!< Instruction 'setbe'.
+    kIdSetc,                             //!< Instruction 'setc'.
+    kIdSete,                             //!< Instruction 'sete'.
+    kIdSetg,                             //!< Instruction 'setg'.
+    kIdSetge,                            //!< Instruction 'setge'.
+    kIdSetl,                             //!< Instruction 'setl'.
+    kIdSetle,                            //!< Instruction 'setle'.
+    kIdSetna,                            //!< Instruction 'setna'.
+    kIdSetnae,                           //!< Instruction 'setnae'.
+    kIdSetnb,                            //!< Instruction 'setnb'.
+    kIdSetnbe,                           //!< Instruction 'setnbe'.
+    kIdSetnc,                            //!< Instruction 'setnc'.
+    kIdSetne,                            //!< Instruction 'setne'.
+    kIdSetng,                            //!< Instruction 'setng'.
+    kIdSetnge,                           //!< Instruction 'setnge'.
+    kIdSetnl,                            //!< Instruction 'setnl'.
+    kIdSetnle,                           //!< Instruction 'setnle'.
+    kIdSetno,                            //!< Instruction 'setno'.
+    kIdSetnp,                            //!< Instruction 'setnp'.
+    kIdSetns,                            //!< Instruction 'setns'.
+    kIdSetnz,                            //!< Instruction 'setnz'.
+    kIdSeto,                             //!< Instruction 'seto'.
+    kIdSetp,                             //!< Instruction 'setp'.
+    kIdSetpe,                            //!< Instruction 'setpe'.
+    kIdSetpo,                            //!< Instruction 'setpo'.
+    kIdSets,                             //!< Instruction 'sets'.
+    kIdSetssbsy,                         //!< Instruction 'setssbsy' {CET_SS}.
+    kIdSetz,                             //!< Instruction 'setz'.
+    kIdSfence,                           //!< Instruction 'sfence' {MMX2}.
+    kIdSgdt,                             //!< Instruction 'sgdt'.
+    kIdSha1msg1,                         //!< Instruction 'sha1msg1' {SHA}.
+    kIdSha1msg2,                         //!< Instruction 'sha1msg2' {SHA}.
+    kIdSha1nexte,                        //!< Instruction 'sha1nexte' {SHA}.
+    kIdSha1rnds4,                        //!< Instruction 'sha1rnds4' {SHA}.
+    kIdSha256msg1,                       //!< Instruction 'sha256msg1' {SHA}.
+    kIdSha256msg2,                       //!< Instruction 'sha256msg2' {SHA}.
+    kIdSha256rnds2,                      //!< Instruction 'sha256rnds2' {SHA}.
+    kIdShl,                              //!< Instruction 'shl'.
+    kIdShld,                             //!< Instruction 'shld'.
+    kIdShlx,                             //!< Instruction 'shlx' {BMI2}.
+    kIdShr,                              //!< Instruction 'shr'.
+    kIdShrd,                             //!< Instruction 'shrd'.
+    kIdShrx,                             //!< Instruction 'shrx' {BMI2}.
+    kIdShufpd,                           //!< Instruction 'shufpd' {SSE2}.
+    kIdShufps,                           //!< Instruction 'shufps' {SSE}.
+    kIdSidt,                             //!< Instruction 'sidt'.
+    kIdSkinit,                           //!< Instruction 'skinit' {SKINIT}.
+    kIdSldt,                             //!< Instruction 'sldt'.
+    kIdSlwpcb,                           //!< Instruction 'slwpcb' {LWP}.
+    kIdSmsw,                             //!< Instruction 'smsw'.
+    kIdSqrtpd,                           //!< Instruction 'sqrtpd' {SSE2}.
+    kIdSqrtps,                           //!< Instruction 'sqrtps' {SSE}.
+    kIdSqrtsd,                           //!< Instruction 'sqrtsd' {SSE2}.
+    kIdSqrtss,                           //!< Instruction 'sqrtss' {SSE}.
+    kIdStac,                             //!< Instruction 'stac' {SMAP}.
+    kIdStc,                              //!< Instruction 'stc'.
+    kIdStd,                              //!< Instruction 'std'.
+    kIdStgi,                             //!< Instruction 'stgi' {SKINIT}.
+    kIdSti,                              //!< Instruction 'sti'.
+    kIdStmxcsr,                          //!< Instruction 'stmxcsr' {SSE}.
+    kIdStos,                             //!< Instruction 'stos'.
+    kIdStr,                              //!< Instruction 'str'.
+    kIdSttilecfg,                        //!< Instruction 'sttilecfg' {AMX_TILE} (X64).
+    kIdStui,                             //!< Instruction 'stui' {UINTR} (X64).
+    kIdSub,                              //!< Instruction 'sub'.
+    kIdSubpd,                            //!< Instruction 'subpd' {SSE2}.
+    kIdSubps,                            //!< Instruction 'subps' {SSE}.
+    kIdSubsd,                            //!< Instruction 'subsd' {SSE2}.
+    kIdSubss,                            //!< Instruction 'subss' {SSE}.
+    kIdSwapgs,                           //!< Instruction 'swapgs' (X64).
+    kIdSyscall,                          //!< Instruction 'syscall' (X64).
+    kIdSysenter,                         //!< Instruction 'sysenter'.
+    kIdSysexit,                          //!< Instruction 'sysexit'.
+    kIdSysexitq,                         //!< Instruction 'sysexitq'.
+    kIdSysret,                           //!< Instruction 'sysret' (X64).
+    kIdSysretq,                          //!< Instruction 'sysretq' (X64).
+    kIdT1mskc,                           //!< Instruction 't1mskc' {TBM}.
+    kIdTdpbf16ps,                        //!< Instruction 'tdpbf16ps' {AMX_BF16} (X64).
+    kIdTdpbssd,                          //!< Instruction 'tdpbssd' {AMX_INT8} (X64).
+    kIdTdpbsud,                          //!< Instruction 'tdpbsud' {AMX_INT8} (X64).
+    kIdTdpbusd,                          //!< Instruction 'tdpbusd' {AMX_INT8} (X64).
+    kIdTdpbuud,                          //!< Instruction 'tdpbuud' {AMX_INT8} (X64).
+    kIdTest,                             //!< Instruction 'test'.
+    kIdTestui,                           //!< Instruction 'testui' {UINTR} (X64).
+    kIdTileloadd,                        //!< Instruction 'tileloadd' {AMX_TILE} (X64).
+    kIdTileloaddt1,                      //!< Instruction 'tileloaddt1' {AMX_TILE} (X64).
+    kIdTilerelease,                      //!< Instruction 'tilerelease' {AMX_TILE} (X64).
+    kIdTilestored,                       //!< Instruction 'tilestored' {AMX_TILE} (X64).
+    kIdTilezero,                         //!< Instruction 'tilezero' {AMX_TILE} (X64).
+    kIdTpause,                           //!< Instruction 'tpause' {WAITPKG}.
+    kIdTzcnt,                            //!< Instruction 'tzcnt' {BMI}.
+    kIdTzmsk,                            //!< Instruction 'tzmsk' {TBM}.
+    kIdUcomisd,                          //!< Instruction 'ucomisd' {SSE2}.
+    kIdUcomiss,                          //!< Instruction 'ucomiss' {SSE}.
+    kIdUd0,                              //!< Instruction 'ud0'.
+    kIdUd1,                              //!< Instruction 'ud1'.
+    kIdUd2,                              //!< Instruction 'ud2'.
+    kIdUiret,                            //!< Instruction 'uiret' {UINTR} (X64).
+    kIdUmonitor,                         //!< Instruction 'umonitor' {WAITPKG}.
+    kIdUmwait,                           //!< Instruction 'umwait' {WAITPKG}.
+    kIdUnpckhpd,                         //!< Instruction 'unpckhpd' {SSE2}.
+    kIdUnpckhps,                         //!< Instruction 'unpckhps' {SSE}.
+    kIdUnpcklpd,                         //!< Instruction 'unpcklpd' {SSE2}.
+    kIdUnpcklps,                         //!< Instruction 'unpcklps' {SSE}.
+    kIdV4fmaddps,                        //!< Instruction 'v4fmaddps' {AVX512_4FMAPS}.
+    kIdV4fmaddss,                        //!< Instruction 'v4fmaddss' {AVX512_4FMAPS}.
+    kIdV4fnmaddps,                       //!< Instruction 'v4fnmaddps' {AVX512_4FMAPS}.
+    kIdV4fnmaddss,                       //!< Instruction 'v4fnmaddss' {AVX512_4FMAPS}.
+    kIdVaddpd,                           //!< Instruction 'vaddpd' {AVX|AVX512_F+VL}.
+    kIdVaddph,                           //!< Instruction 'vaddph' {AVX512_FP16+VL}.
+    kIdVaddps,                           //!< Instruction 'vaddps' {AVX|AVX512_F+VL}.
+    kIdVaddsd,                           //!< Instruction 'vaddsd' {AVX|AVX512_F}.
+    kIdVaddsh,                           //!< Instruction 'vaddsh' {AVX512_FP16}.
+    kIdVaddss,                           //!< Instruction 'vaddss' {AVX|AVX512_F}.
+    kIdVaddsubpd,                        //!< Instruction 'vaddsubpd' {AVX}.
+    kIdVaddsubps,                        //!< Instruction 'vaddsubps' {AVX}.
+    kIdVaesdec,                          //!< Instruction 'vaesdec' {AVX|AVX512_F+VL & AESNI|VAES}.
+    kIdVaesdeclast,                      //!< Instruction 'vaesdeclast' {AVX|AVX512_F+VL & AESNI|VAES}.
+    kIdVaesenc,                          //!< Instruction 'vaesenc' {AVX|AVX512_F+VL & AESNI|VAES}.
+    kIdVaesenclast,                      //!< Instruction 'vaesenclast' {AVX|AVX512_F+VL & AESNI|VAES}.
+    kIdVaesimc,                          //!< Instruction 'vaesimc' {AVX & AESNI}.
+    kIdVaeskeygenassist,                 //!< Instruction 'vaeskeygenassist' {AVX & AESNI}.
+    kIdValignd,                          //!< Instruction 'valignd' {AVX512_F+VL}.
+    kIdValignq,                          //!< Instruction 'valignq' {AVX512_F+VL}.
+    kIdVandnpd,                          //!< Instruction 'vandnpd' {AVX|AVX512_DQ+VL}.
+    kIdVandnps,                          //!< Instruction 'vandnps' {AVX|AVX512_DQ+VL}.
+    kIdVandpd,                           //!< Instruction 'vandpd' {AVX|AVX512_DQ+VL}.
+    kIdVandps,                           //!< Instruction 'vandps' {AVX|AVX512_DQ+VL}.
+    kIdVblendmpd,                        //!< Instruction 'vblendmpd' {AVX512_F+VL}.
+    kIdVblendmps,                        //!< Instruction 'vblendmps' {AVX512_F+VL}.
+    kIdVblendpd,                         //!< Instruction 'vblendpd' {AVX}.
+    kIdVblendps,                         //!< Instruction 'vblendps' {AVX}.
+    kIdVblendvpd,                        //!< Instruction 'vblendvpd' {AVX}.
+    kIdVblendvps,                        //!< Instruction 'vblendvps' {AVX}.
+    kIdVbroadcastf128,                   //!< Instruction 'vbroadcastf128' {AVX}.
+    kIdVbroadcastf32x2,                  //!< Instruction 'vbroadcastf32x2' {AVX512_DQ+VL}.
+    kIdVbroadcastf32x4,                  //!< Instruction 'vbroadcastf32x4' {AVX512_F}.
+    kIdVbroadcastf32x8,                  //!< Instruction 'vbroadcastf32x8' {AVX512_DQ}.
+    kIdVbroadcastf64x2,                  //!< Instruction 'vbroadcastf64x2' {AVX512_DQ+VL}.
+    kIdVbroadcastf64x4,                  //!< Instruction 'vbroadcastf64x4' {AVX512_F}.
+    kIdVbroadcasti128,                   //!< Instruction 'vbroadcasti128' {AVX2}.
+    kIdVbroadcasti32x2,                  //!< Instruction 'vbroadcasti32x2' {AVX512_DQ+VL}.
+    kIdVbroadcasti32x4,                  //!< Instruction 'vbroadcasti32x4' {AVX512_F+VL}.
+    kIdVbroadcasti32x8,                  //!< Instruction 'vbroadcasti32x8' {AVX512_DQ}.
+    kIdVbroadcasti64x2,                  //!< Instruction 'vbroadcasti64x2' {AVX512_DQ+VL}.
+    kIdVbroadcasti64x4,                  //!< Instruction 'vbroadcasti64x4' {AVX512_F}.
+    kIdVbroadcastsd,                     //!< Instruction 'vbroadcastsd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVbroadcastss,                     //!< Instruction 'vbroadcastss' {AVX|AVX2|AVX512_F+VL}.
+    kIdVcmppd,                           //!< Instruction 'vcmppd' {AVX|AVX512_F+VL}.
+    kIdVcmpph,                           //!< Instruction 'vcmpph' {AVX512_FP16+VL}.
+    kIdVcmpps,                           //!< Instruction 'vcmpps' {AVX|AVX512_F+VL}.
+    kIdVcmpsd,                           //!< Instruction 'vcmpsd' {AVX|AVX512_F}.
+    kIdVcmpsh,                           //!< Instruction 'vcmpsh' {AVX512_FP16}.
+    kIdVcmpss,                           //!< Instruction 'vcmpss' {AVX|AVX512_F}.
+    kIdVcomisd,                          //!< Instruction 'vcomisd' {AVX|AVX512_F}.
+    kIdVcomish,                          //!< Instruction 'vcomish' {AVX512_FP16}.
+    kIdVcomiss,                          //!< Instruction 'vcomiss' {AVX|AVX512_F}.
+    kIdVcompresspd,                      //!< Instruction 'vcompresspd' {AVX512_F+VL}.
+    kIdVcompressps,                      //!< Instruction 'vcompressps' {AVX512_F+VL}.
+    kIdVcvtdq2pd,                        //!< Instruction 'vcvtdq2pd' {AVX|AVX512_F+VL}.
+    kIdVcvtdq2ph,                        //!< Instruction 'vcvtdq2ph' {AVX512_FP16+VL}.
+    kIdVcvtdq2ps,                        //!< Instruction 'vcvtdq2ps' {AVX|AVX512_F+VL}.
+    kIdVcvtne2ps2bf16,                   //!< Instruction 'vcvtne2ps2bf16' {AVX512_BF16+VL}.
+    kIdVcvtneps2bf16,                    //!< Instruction 'vcvtneps2bf16' {AVX512_BF16+VL}.
+    kIdVcvtpd2dq,                        //!< Instruction 'vcvtpd2dq' {AVX|AVX512_F+VL}.
+    kIdVcvtpd2ph,                        //!< Instruction 'vcvtpd2ph' {AVX512_FP16+VL}.
+    kIdVcvtpd2ps,                        //!< Instruction 'vcvtpd2ps' {AVX|AVX512_F+VL}.
+    kIdVcvtpd2qq,                        //!< Instruction 'vcvtpd2qq' {AVX512_DQ+VL}.
+    kIdVcvtpd2udq,                       //!< Instruction 'vcvtpd2udq' {AVX512_F+VL}.
+    kIdVcvtpd2uqq,                       //!< Instruction 'vcvtpd2uqq' {AVX512_DQ+VL}.
+    kIdVcvtph2dq,                        //!< Instruction 'vcvtph2dq' {AVX512_FP16+VL}.
+    kIdVcvtph2pd,                        //!< Instruction 'vcvtph2pd' {AVX512_FP16+VL}.
+    kIdVcvtph2ps,                        //!< Instruction 'vcvtph2ps' {AVX512_F+VL & F16C}.
+    kIdVcvtph2psx,                       //!< Instruction 'vcvtph2psx' {AVX512_FP16+VL}.
+    kIdVcvtph2qq,                        //!< Instruction 'vcvtph2qq' {AVX512_FP16+VL}.
+    kIdVcvtph2udq,                       //!< Instruction 'vcvtph2udq' {AVX512_FP16+VL}.
+    kIdVcvtph2uqq,                       //!< Instruction 'vcvtph2uqq' {AVX512_FP16+VL}.
+    kIdVcvtph2uw,                        //!< Instruction 'vcvtph2uw' {AVX512_FP16+VL}.
+    kIdVcvtph2w,                         //!< Instruction 'vcvtph2w' {AVX512_FP16+VL}.
+    kIdVcvtps2dq,                        //!< Instruction 'vcvtps2dq' {AVX|AVX512_F+VL}.
+    kIdVcvtps2pd,                        //!< Instruction 'vcvtps2pd' {AVX|AVX512_F+VL}.
+    kIdVcvtps2ph,                        //!< Instruction 'vcvtps2ph' {AVX512_F+VL & F16C}.
+    kIdVcvtps2phx,                       //!< Instruction 'vcvtps2phx' {AVX512_FP16+VL}.
+    kIdVcvtps2qq,                        //!< Instruction 'vcvtps2qq' {AVX512_DQ+VL}.
+    kIdVcvtps2udq,                       //!< Instruction 'vcvtps2udq' {AVX512_F+VL}.
+    kIdVcvtps2uqq,                       //!< Instruction 'vcvtps2uqq' {AVX512_DQ+VL}.
+    kIdVcvtqq2pd,                        //!< Instruction 'vcvtqq2pd' {AVX512_DQ+VL}.
+    kIdVcvtqq2ph,                        //!< Instruction 'vcvtqq2ph' {AVX512_FP16+VL}.
+    kIdVcvtqq2ps,                        //!< Instruction 'vcvtqq2ps' {AVX512_DQ+VL}.
+    kIdVcvtsd2sh,                        //!< Instruction 'vcvtsd2sh' {AVX512_FP16}.
+    kIdVcvtsd2si,                        //!< Instruction 'vcvtsd2si' {AVX|AVX512_F}.
+    kIdVcvtsd2ss,                        //!< Instruction 'vcvtsd2ss' {AVX|AVX512_F}.
+    kIdVcvtsd2usi,                       //!< Instruction 'vcvtsd2usi' {AVX512_F}.
+    kIdVcvtsh2sd,                        //!< Instruction 'vcvtsh2sd' {AVX512_FP16}.
+    kIdVcvtsh2si,                        //!< Instruction 'vcvtsh2si' {AVX512_FP16}.
+    kIdVcvtsh2ss,                        //!< Instruction 'vcvtsh2ss' {AVX512_FP16}.
+    kIdVcvtsh2usi,                       //!< Instruction 'vcvtsh2usi' {AVX512_FP16}.
+    kIdVcvtsi2sd,                        //!< Instruction 'vcvtsi2sd' {AVX|AVX512_F}.
+    kIdVcvtsi2sh,                        //!< Instruction 'vcvtsi2sh' {AVX512_FP16}.
+    kIdVcvtsi2ss,                        //!< Instruction 'vcvtsi2ss' {AVX|AVX512_F}.
+    kIdVcvtss2sd,                        //!< Instruction 'vcvtss2sd' {AVX|AVX512_F}.
+    kIdVcvtss2sh,                        //!< Instruction 'vcvtss2sh' {AVX512_FP16}.
+    kIdVcvtss2si,                        //!< Instruction 'vcvtss2si' {AVX|AVX512_F}.
+    kIdVcvtss2usi,                       //!< Instruction 'vcvtss2usi' {AVX512_F}.
+    kIdVcvttpd2dq,                       //!< Instruction 'vcvttpd2dq' {AVX|AVX512_F+VL}.
+    kIdVcvttpd2qq,                       //!< Instruction 'vcvttpd2qq' {AVX512_F+VL}.
+    kIdVcvttpd2udq,                      //!< Instruction 'vcvttpd2udq' {AVX512_F+VL}.
+    kIdVcvttpd2uqq,                      //!< Instruction 'vcvttpd2uqq' {AVX512_DQ+VL}.
+    kIdVcvttph2dq,                       //!< Instruction 'vcvttph2dq' {AVX512_FP16+VL}.
+    kIdVcvttph2qq,                       //!< Instruction 'vcvttph2qq' {AVX512_FP16+VL}.
+    kIdVcvttph2udq,                      //!< Instruction 'vcvttph2udq' {AVX512_FP16+VL}.
+    kIdVcvttph2uqq,                      //!< Instruction 'vcvttph2uqq' {AVX512_FP16+VL}.
+    kIdVcvttph2uw,                       //!< Instruction 'vcvttph2uw' {AVX512_FP16+VL}.
+    kIdVcvttph2w,                        //!< Instruction 'vcvttph2w' {AVX512_FP16+VL}.
+    kIdVcvttps2dq,                       //!< Instruction 'vcvttps2dq' {AVX|AVX512_F+VL}.
+    kIdVcvttps2qq,                       //!< Instruction 'vcvttps2qq' {AVX512_DQ+VL}.
+    kIdVcvttps2udq,                      //!< Instruction 'vcvttps2udq' {AVX512_F+VL}.
+    kIdVcvttps2uqq,                      //!< Instruction 'vcvttps2uqq' {AVX512_DQ+VL}.
+    kIdVcvttsd2si,                       //!< Instruction 'vcvttsd2si' {AVX|AVX512_F}.
+    kIdVcvttsd2usi,                      //!< Instruction 'vcvttsd2usi' {AVX512_F}.
+    kIdVcvttsh2si,                       //!< Instruction 'vcvttsh2si' {AVX512_FP16}.
+    kIdVcvttsh2usi,                      //!< Instruction 'vcvttsh2usi' {AVX512_FP16}.
+    kIdVcvttss2si,                       //!< Instruction 'vcvttss2si' {AVX|AVX512_F}.
+    kIdVcvttss2usi,                      //!< Instruction 'vcvttss2usi' {AVX512_F}.
+    kIdVcvtudq2pd,                       //!< Instruction 'vcvtudq2pd' {AVX512_F+VL}.
+    kIdVcvtudq2ph,                       //!< Instruction 'vcvtudq2ph' {AVX512_FP16+VL}.
+    kIdVcvtudq2ps,                       //!< Instruction 'vcvtudq2ps' {AVX512_F+VL}.
+    kIdVcvtuqq2pd,                       //!< Instruction 'vcvtuqq2pd' {AVX512_DQ+VL}.
+    kIdVcvtuqq2ph,                       //!< Instruction 'vcvtuqq2ph' {AVX512_FP16+VL}.
+    kIdVcvtuqq2ps,                       //!< Instruction 'vcvtuqq2ps' {AVX512_DQ+VL}.
+    kIdVcvtusi2sd,                       //!< Instruction 'vcvtusi2sd' {AVX512_F}.
+    kIdVcvtusi2sh,                       //!< Instruction 'vcvtusi2sh' {AVX512_FP16}.
+    kIdVcvtusi2ss,                       //!< Instruction 'vcvtusi2ss' {AVX512_F}.
+    kIdVcvtuw2ph,                        //!< Instruction 'vcvtuw2ph' {AVX512_FP16+VL}.
+    kIdVcvtw2ph,                         //!< Instruction 'vcvtw2ph' {AVX512_FP16+VL}.
+    kIdVdbpsadbw,                        //!< Instruction 'vdbpsadbw' {AVX512_BW+VL}.
+    kIdVdivpd,                           //!< Instruction 'vdivpd' {AVX|AVX512_F+VL}.
+    kIdVdivph,                           //!< Instruction 'vdivph' {AVX512_FP16+VL}.
+    kIdVdivps,                           //!< Instruction 'vdivps' {AVX|AVX512_F+VL}.
+    kIdVdivsd,                           //!< Instruction 'vdivsd' {AVX|AVX512_F}.
+    kIdVdivsh,                           //!< Instruction 'vdivsh' {AVX512_FP16}.
+    kIdVdivss,                           //!< Instruction 'vdivss' {AVX|AVX512_F}.
+    kIdVdpbf16ps,                        //!< Instruction 'vdpbf16ps' {AVX512_BF16+VL}.
+    kIdVdppd,                            //!< Instruction 'vdppd' {AVX}.
+    kIdVdpps,                            //!< Instruction 'vdpps' {AVX}.
+    kIdVerr,                             //!< Instruction 'verr'.
+    kIdVerw,                             //!< Instruction 'verw'.
+    kIdVexp2pd,                          //!< Instruction 'vexp2pd' {AVX512_ERI}.
+    kIdVexp2ps,                          //!< Instruction 'vexp2ps' {AVX512_ERI}.
+    kIdVexpandpd,                        //!< Instruction 'vexpandpd' {AVX512_F+VL}.
+    kIdVexpandps,                        //!< Instruction 'vexpandps' {AVX512_F+VL}.
+    kIdVextractf128,                     //!< Instruction 'vextractf128' {AVX}.
+    kIdVextractf32x4,                    //!< Instruction 'vextractf32x4' {AVX512_F+VL}.
+    kIdVextractf32x8,                    //!< Instruction 'vextractf32x8' {AVX512_DQ}.
+    kIdVextractf64x2,                    //!< Instruction 'vextractf64x2' {AVX512_DQ+VL}.
+    kIdVextractf64x4,                    //!< Instruction 'vextractf64x4' {AVX512_F}.
+    kIdVextracti128,                     //!< Instruction 'vextracti128' {AVX2}.
+    kIdVextracti32x4,                    //!< Instruction 'vextracti32x4' {AVX512_F+VL}.
+    kIdVextracti32x8,                    //!< Instruction 'vextracti32x8' {AVX512_DQ}.
+    kIdVextracti64x2,                    //!< Instruction 'vextracti64x2' {AVX512_DQ+VL}.
+    kIdVextracti64x4,                    //!< Instruction 'vextracti64x4' {AVX512_F}.
+    kIdVextractps,                       //!< Instruction 'vextractps' {AVX|AVX512_F}.
+    kIdVfcmaddcph,                       //!< Instruction 'vfcmaddcph' {AVX512_FP16+VL}.
+    kIdVfcmaddcsh,                       //!< Instruction 'vfcmaddcsh' {AVX512_FP16+VL}.
+    kIdVfcmulcph,                        //!< Instruction 'vfcmulcph' {AVX512_FP16+VL}.
+    kIdVfcmulcsh,                        //!< Instruction 'vfcmulcsh' {AVX512_FP16+VL}.
+    kIdVfixupimmpd,                      //!< Instruction 'vfixupimmpd' {AVX512_F+VL}.
+    kIdVfixupimmps,                      //!< Instruction 'vfixupimmps' {AVX512_F+VL}.
+    kIdVfixupimmsd,                      //!< Instruction 'vfixupimmsd' {AVX512_F}.
+    kIdVfixupimmss,                      //!< Instruction 'vfixupimmss' {AVX512_F}.
+    kIdVfmadd132pd,                      //!< Instruction 'vfmadd132pd' {FMA|AVX512_F+VL}.
+    kIdVfmadd132ph,                      //!< Instruction 'vfmadd132ph' {AVX512_FP16+VL}.
+    kIdVfmadd132ps,                      //!< Instruction 'vfmadd132ps' {FMA|AVX512_F+VL}.
+    kIdVfmadd132sd,                      //!< Instruction 'vfmadd132sd' {FMA|AVX512_F}.
+    kIdVfmadd132sh,                      //!< Instruction 'vfmadd132sh' {AVX512_FP16}.
+    kIdVfmadd132ss,                      //!< Instruction 'vfmadd132ss' {FMA|AVX512_F}.
+    kIdVfmadd213pd,                      //!< Instruction 'vfmadd213pd' {FMA|AVX512_F+VL}.
+    kIdVfmadd213ph,                      //!< Instruction 'vfmadd213ph' {AVX512_FP16+VL}.
+    kIdVfmadd213ps,                      //!< Instruction 'vfmadd213ps' {FMA|AVX512_F+VL}.
+    kIdVfmadd213sd,                      //!< Instruction 'vfmadd213sd' {FMA|AVX512_F}.
+    kIdVfmadd213sh,                      //!< Instruction 'vfmadd213sh' {AVX512_FP16}.
+    kIdVfmadd213ss,                      //!< Instruction 'vfmadd213ss' {FMA|AVX512_F}.
+    kIdVfmadd231pd,                      //!< Instruction 'vfmadd231pd' {FMA|AVX512_F+VL}.
+    kIdVfmadd231ph,                      //!< Instruction 'vfmadd231ph' {AVX512_FP16+VL}.
+    kIdVfmadd231ps,                      //!< Instruction 'vfmadd231ps' {FMA|AVX512_F+VL}.
+    kIdVfmadd231sd,                      //!< Instruction 'vfmadd231sd' {FMA|AVX512_F}.
+    kIdVfmadd231sh,                      //!< Instruction 'vfmadd231sh' {AVX512_FP16}.
+    kIdVfmadd231ss,                      //!< Instruction 'vfmadd231ss' {FMA|AVX512_F}.
+    kIdVfmaddcph,                        //!< Instruction 'vfmaddcph' {AVX512_FP16+VL}.
+    kIdVfmaddcsh,                        //!< Instruction 'vfmaddcsh' {AVX512_FP16+VL}.
+    kIdVfmaddpd,                         //!< Instruction 'vfmaddpd' {FMA4}.
+    kIdVfmaddps,                         //!< Instruction 'vfmaddps' {FMA4}.
+    kIdVfmaddsd,                         //!< Instruction 'vfmaddsd' {FMA4}.
+    kIdVfmaddss,                         //!< Instruction 'vfmaddss' {FMA4}.
+    kIdVfmaddsub132pd,                   //!< Instruction 'vfmaddsub132pd' {FMA|AVX512_F+VL}.
+    kIdVfmaddsub132ph,                   //!< Instruction 'vfmaddsub132ph' {AVX512_FP16+VL}.
+    kIdVfmaddsub132ps,                   //!< Instruction 'vfmaddsub132ps' {FMA|AVX512_F+VL}.
+    kIdVfmaddsub213pd,                   //!< Instruction 'vfmaddsub213pd' {FMA|AVX512_F+VL}.
+    kIdVfmaddsub213ph,                   //!< Instruction 'vfmaddsub213ph' {AVX512_FP16+VL}.
+    kIdVfmaddsub213ps,                   //!< Instruction 'vfmaddsub213ps' {FMA|AVX512_F+VL}.
+    kIdVfmaddsub231pd,                   //!< Instruction 'vfmaddsub231pd' {FMA|AVX512_F+VL}.
+    kIdVfmaddsub231ph,                   //!< Instruction 'vfmaddsub231ph' {AVX512_FP16+VL}.
+    kIdVfmaddsub231ps,                   //!< Instruction 'vfmaddsub231ps' {FMA|AVX512_F+VL}.
+    kIdVfmaddsubpd,                      //!< Instruction 'vfmaddsubpd' {FMA4}.
+    kIdVfmaddsubps,                      //!< Instruction 'vfmaddsubps' {FMA4}.
+    kIdVfmsub132pd,                      //!< Instruction 'vfmsub132pd' {FMA|AVX512_F+VL}.
+    kIdVfmsub132ph,                      //!< Instruction 'vfmsub132ph' {AVX512_FP16+VL}.
+    kIdVfmsub132ps,                      //!< Instruction 'vfmsub132ps' {FMA|AVX512_F+VL}.
+    kIdVfmsub132sd,                      //!< Instruction 'vfmsub132sd' {FMA|AVX512_F}.
+    kIdVfmsub132sh,                      //!< Instruction 'vfmsub132sh' {AVX512_FP16}.
+    kIdVfmsub132ss,                      //!< Instruction 'vfmsub132ss' {FMA|AVX512_F}.
+    kIdVfmsub213pd,                      //!< Instruction 'vfmsub213pd' {FMA|AVX512_F+VL}.
+    kIdVfmsub213ph,                      //!< Instruction 'vfmsub213ph' {AVX512_FP16+VL}.
+    kIdVfmsub213ps,                      //!< Instruction 'vfmsub213ps' {FMA|AVX512_F+VL}.
+    kIdVfmsub213sd,                      //!< Instruction 'vfmsub213sd' {FMA|AVX512_F}.
+    kIdVfmsub213sh,                      //!< Instruction 'vfmsub213sh' {AVX512_FP16}.
+    kIdVfmsub213ss,                      //!< Instruction 'vfmsub213ss' {FMA|AVX512_F}.
+    kIdVfmsub231pd,                      //!< Instruction 'vfmsub231pd' {FMA|AVX512_F+VL}.
+    kIdVfmsub231ph,                      //!< Instruction 'vfmsub231ph' {AVX512_FP16+VL}.
+    kIdVfmsub231ps,                      //!< Instruction 'vfmsub231ps' {FMA|AVX512_F+VL}.
+    kIdVfmsub231sd,                      //!< Instruction 'vfmsub231sd' {FMA|AVX512_F}.
+    kIdVfmsub231sh,                      //!< Instruction 'vfmsub231sh' {AVX512_FP16}.
+    kIdVfmsub231ss,                      //!< Instruction 'vfmsub231ss' {FMA|AVX512_F}.
+    kIdVfmsubadd132pd,                   //!< Instruction 'vfmsubadd132pd' {FMA|AVX512_F+VL}.
+    kIdVfmsubadd132ph,                   //!< Instruction 'vfmsubadd132ph' {AVX512_FP16+VL}.
+    kIdVfmsubadd132ps,                   //!< Instruction 'vfmsubadd132ps' {FMA|AVX512_F+VL}.
+    kIdVfmsubadd213pd,                   //!< Instruction 'vfmsubadd213pd' {FMA|AVX512_F+VL}.
+    kIdVfmsubadd213ph,                   //!< Instruction 'vfmsubadd213ph' {AVX512_FP16+VL}.
+    kIdVfmsubadd213ps,                   //!< Instruction 'vfmsubadd213ps' {FMA|AVX512_F+VL}.
+    kIdVfmsubadd231pd,                   //!< Instruction 'vfmsubadd231pd' {FMA|AVX512_F+VL}.
+    kIdVfmsubadd231ph,                   //!< Instruction 'vfmsubadd231ph' {AVX512_FP16+VL}.
+    kIdVfmsubadd231ps,                   //!< Instruction 'vfmsubadd231ps' {FMA|AVX512_F+VL}.
+    kIdVfmsubaddpd,                      //!< Instruction 'vfmsubaddpd' {FMA4}.
+    kIdVfmsubaddps,                      //!< Instruction 'vfmsubaddps' {FMA4}.
+    kIdVfmsubpd,                         //!< Instruction 'vfmsubpd' {FMA4}.
+    kIdVfmsubps,                         //!< Instruction 'vfmsubps' {FMA4}.
+    kIdVfmsubsd,                         //!< Instruction 'vfmsubsd' {FMA4}.
+    kIdVfmsubss,                         //!< Instruction 'vfmsubss' {FMA4}.
+    kIdVfmulcph,                         //!< Instruction 'vfmulcph' {AVX512_FP16+VL}.
+    kIdVfmulcsh,                         //!< Instruction 'vfmulcsh' {AVX512_FP16+VL}.
+    kIdVfnmadd132pd,                     //!< Instruction 'vfnmadd132pd' {FMA|AVX512_F+VL}.
+    kIdVfnmadd132ph,                     //!< Instruction 'vfnmadd132ph' {AVX512_FP16+VL}.
+    kIdVfnmadd132ps,                     //!< Instruction 'vfnmadd132ps' {FMA|AVX512_F+VL}.
+    kIdVfnmadd132sd,                     //!< Instruction 'vfnmadd132sd' {FMA|AVX512_F}.
+    kIdVfnmadd132sh,                     //!< Instruction 'vfnmadd132sh' {AVX512_FP16}.
+    kIdVfnmadd132ss,                     //!< Instruction 'vfnmadd132ss' {FMA|AVX512_F}.
+    kIdVfnmadd213pd,                     //!< Instruction 'vfnmadd213pd' {FMA|AVX512_F+VL}.
+    kIdVfnmadd213ph,                     //!< Instruction 'vfnmadd213ph' {AVX512_FP16+VL}.
+    kIdVfnmadd213ps,                     //!< Instruction 'vfnmadd213ps' {FMA|AVX512_F+VL}.
+    kIdVfnmadd213sd,                     //!< Instruction 'vfnmadd213sd' {FMA|AVX512_F}.
+    kIdVfnmadd213sh,                     //!< Instruction 'vfnmadd213sh' {AVX512_FP16}.
+    kIdVfnmadd213ss,                     //!< Instruction 'vfnmadd213ss' {FMA|AVX512_F}.
+    kIdVfnmadd231pd,                     //!< Instruction 'vfnmadd231pd' {FMA|AVX512_F+VL}.
+    kIdVfnmadd231ph,                     //!< Instruction 'vfnmadd231ph' {AVX512_FP16+VL}.
+    kIdVfnmadd231ps,                     //!< Instruction 'vfnmadd231ps' {FMA|AVX512_F+VL}.
+    kIdVfnmadd231sd,                     //!< Instruction 'vfnmadd231sd' {FMA|AVX512_F}.
+    kIdVfnmadd231sh,                     //!< Instruction 'vfnmadd231sh' {AVX512_FP16}.
+    kIdVfnmadd231ss,                     //!< Instruction 'vfnmadd231ss' {FMA|AVX512_F}.
+    kIdVfnmaddpd,                        //!< Instruction 'vfnmaddpd' {FMA4}.
+    kIdVfnmaddps,                        //!< Instruction 'vfnmaddps' {FMA4}.
+    kIdVfnmaddsd,                        //!< Instruction 'vfnmaddsd' {FMA4}.
+    kIdVfnmaddss,                        //!< Instruction 'vfnmaddss' {FMA4}.
+    kIdVfnmsub132pd,                     //!< Instruction 'vfnmsub132pd' {FMA|AVX512_F+VL}.
+    kIdVfnmsub132ph,                     //!< Instruction 'vfnmsub132ph' {AVX512_FP16+VL}.
+    kIdVfnmsub132ps,                     //!< Instruction 'vfnmsub132ps' {FMA|AVX512_F+VL}.
+    kIdVfnmsub132sd,                     //!< Instruction 'vfnmsub132sd' {FMA|AVX512_F}.
+    kIdVfnmsub132sh,                     //!< Instruction 'vfnmsub132sh' {AVX512_FP16}.
+    kIdVfnmsub132ss,                     //!< Instruction 'vfnmsub132ss' {FMA|AVX512_F}.
+    kIdVfnmsub213pd,                     //!< Instruction 'vfnmsub213pd' {FMA|AVX512_F+VL}.
+    kIdVfnmsub213ph,                     //!< Instruction 'vfnmsub213ph' {AVX512_FP16+VL}.
+    kIdVfnmsub213ps,                     //!< Instruction 'vfnmsub213ps' {FMA|AVX512_F+VL}.
+    kIdVfnmsub213sd,                     //!< Instruction 'vfnmsub213sd' {FMA|AVX512_F}.
+    kIdVfnmsub213sh,                     //!< Instruction 'vfnmsub213sh' {AVX512_FP16}.
+    kIdVfnmsub213ss,                     //!< Instruction 'vfnmsub213ss' {FMA|AVX512_F}.
+    kIdVfnmsub231pd,                     //!< Instruction 'vfnmsub231pd' {FMA|AVX512_F+VL}.
+    kIdVfnmsub231ph,                     //!< Instruction 'vfnmsub231ph' {AVX512_FP16+VL}.
+    kIdVfnmsub231ps,                     //!< Instruction 'vfnmsub231ps' {FMA|AVX512_F+VL}.
+    kIdVfnmsub231sd,                     //!< Instruction 'vfnmsub231sd' {FMA|AVX512_F}.
+    kIdVfnmsub231sh,                     //!< Instruction 'vfnmsub231sh' {AVX512_FP16}.
+    kIdVfnmsub231ss,                     //!< Instruction 'vfnmsub231ss' {FMA|AVX512_F}.
+    kIdVfnmsubpd,                        //!< Instruction 'vfnmsubpd' {FMA4}.
+    kIdVfnmsubps,                        //!< Instruction 'vfnmsubps' {FMA4}.
+    kIdVfnmsubsd,                        //!< Instruction 'vfnmsubsd' {FMA4}.
+    kIdVfnmsubss,                        //!< Instruction 'vfnmsubss' {FMA4}.
+    kIdVfpclasspd,                       //!< Instruction 'vfpclasspd' {AVX512_DQ+VL}.
+    kIdVfpclassph,                       //!< Instruction 'vfpclassph' {AVX512_FP16+VL}.
+    kIdVfpclassps,                       //!< Instruction 'vfpclassps' {AVX512_DQ+VL}.
+    kIdVfpclasssd,                       //!< Instruction 'vfpclasssd' {AVX512_DQ}.
+    kIdVfpclasssh,                       //!< Instruction 'vfpclasssh' {AVX512_FP16}.
+    kIdVfpclassss,                       //!< Instruction 'vfpclassss' {AVX512_DQ}.
+    kIdVfrczpd,                          //!< Instruction 'vfrczpd' {XOP}.
+    kIdVfrczps,                          //!< Instruction 'vfrczps' {XOP}.
+    kIdVfrczsd,                          //!< Instruction 'vfrczsd' {XOP}.
+    kIdVfrczss,                          //!< Instruction 'vfrczss' {XOP}.
+    kIdVgatherdpd,                       //!< Instruction 'vgatherdpd' {AVX2|AVX512_F+VL}.
+    kIdVgatherdps,                       //!< Instruction 'vgatherdps' {AVX2|AVX512_F+VL}.
+    kIdVgatherpf0dpd,                    //!< Instruction 'vgatherpf0dpd' {AVX512_PFI}.
+    kIdVgatherpf0dps,                    //!< Instruction 'vgatherpf0dps' {AVX512_PFI}.
+    kIdVgatherpf0qpd,                    //!< Instruction 'vgatherpf0qpd' {AVX512_PFI}.
+    kIdVgatherpf0qps,                    //!< Instruction 'vgatherpf0qps' {AVX512_PFI}.
+    kIdVgatherpf1dpd,                    //!< Instruction 'vgatherpf1dpd' {AVX512_PFI}.
+    kIdVgatherpf1dps,                    //!< Instruction 'vgatherpf1dps' {AVX512_PFI}.
+    kIdVgatherpf1qpd,                    //!< Instruction 'vgatherpf1qpd' {AVX512_PFI}.
+    kIdVgatherpf1qps,                    //!< Instruction 'vgatherpf1qps' {AVX512_PFI}.
+    kIdVgatherqpd,                       //!< Instruction 'vgatherqpd' {AVX2|AVX512_F+VL}.
+    kIdVgatherqps,                       //!< Instruction 'vgatherqps' {AVX2|AVX512_F+VL}.
+    kIdVgetexppd,                        //!< Instruction 'vgetexppd' {AVX512_F+VL}.
+    kIdVgetexpph,                        //!< Instruction 'vgetexpph' {AVX512_FP16+VL}.
+    kIdVgetexpps,                        //!< Instruction 'vgetexpps' {AVX512_F+VL}.
+    kIdVgetexpsd,                        //!< Instruction 'vgetexpsd' {AVX512_F}.
+    kIdVgetexpsh,                        //!< Instruction 'vgetexpsh' {AVX512_FP16}.
+    kIdVgetexpss,                        //!< Instruction 'vgetexpss' {AVX512_F}.
+    kIdVgetmantpd,                       //!< Instruction 'vgetmantpd' {AVX512_F+VL}.
+    kIdVgetmantph,                       //!< Instruction 'vgetmantph' {AVX512_FP16+VL}.
+    kIdVgetmantps,                       //!< Instruction 'vgetmantps' {AVX512_F+VL}.
+    kIdVgetmantsd,                       //!< Instruction 'vgetmantsd' {AVX512_F}.
+    kIdVgetmantsh,                       //!< Instruction 'vgetmantsh' {AVX512_FP16}.
+    kIdVgetmantss,                       //!< Instruction 'vgetmantss' {AVX512_F}.
+    kIdVgf2p8affineinvqb,                //!< Instruction 'vgf2p8affineinvqb' {AVX|AVX512_F+VL & GFNI}.
+    kIdVgf2p8affineqb,                   //!< Instruction 'vgf2p8affineqb' {AVX|AVX512_F+VL & GFNI}.
+    kIdVgf2p8mulb,                       //!< Instruction 'vgf2p8mulb' {AVX|AVX512_F+VL & GFNI}.
+    kIdVhaddpd,                          //!< Instruction 'vhaddpd' {AVX}.
+    kIdVhaddps,                          //!< Instruction 'vhaddps' {AVX}.
+    kIdVhsubpd,                          //!< Instruction 'vhsubpd' {AVX}.
+    kIdVhsubps,                          //!< Instruction 'vhsubps' {AVX}.
+    kIdVinsertf128,                      //!< Instruction 'vinsertf128' {AVX}.
+    kIdVinsertf32x4,                     //!< Instruction 'vinsertf32x4' {AVX512_F+VL}.
+    kIdVinsertf32x8,                     //!< Instruction 'vinsertf32x8' {AVX512_DQ}.
+    kIdVinsertf64x2,                     //!< Instruction 'vinsertf64x2' {AVX512_DQ+VL}.
+    kIdVinsertf64x4,                     //!< Instruction 'vinsertf64x4' {AVX512_F}.
+    kIdVinserti128,                      //!< Instruction 'vinserti128' {AVX2}.
+    kIdVinserti32x4,                     //!< Instruction 'vinserti32x4' {AVX512_F+VL}.
+    kIdVinserti32x8,                     //!< Instruction 'vinserti32x8' {AVX512_DQ}.
+    kIdVinserti64x2,                     //!< Instruction 'vinserti64x2' {AVX512_DQ+VL}.
+    kIdVinserti64x4,                     //!< Instruction 'vinserti64x4' {AVX512_F}.
+    kIdVinsertps,                        //!< Instruction 'vinsertps' {AVX|AVX512_F}.
+    kIdVlddqu,                           //!< Instruction 'vlddqu' {AVX}.
+    kIdVldmxcsr,                         //!< Instruction 'vldmxcsr' {AVX}.
+    kIdVmaskmovdqu,                      //!< Instruction 'vmaskmovdqu' {AVX}.
+    kIdVmaskmovpd,                       //!< Instruction 'vmaskmovpd' {AVX}.
+    kIdVmaskmovps,                       //!< Instruction 'vmaskmovps' {AVX}.
+    kIdVmaxpd,                           //!< Instruction 'vmaxpd' {AVX|AVX512_F+VL}.
+    kIdVmaxph,                           //!< Instruction 'vmaxph' {AVX512_FP16+VL}.
+    kIdVmaxps,                           //!< Instruction 'vmaxps' {AVX|AVX512_F+VL}.
+    kIdVmaxsd,                           //!< Instruction 'vmaxsd' {AVX|AVX512_F+VL}.
+    kIdVmaxsh,                           //!< Instruction 'vmaxsh' {AVX512_FP16}.
+    kIdVmaxss,                           //!< Instruction 'vmaxss' {AVX|AVX512_F+VL}.
+    kIdVmcall,                           //!< Instruction 'vmcall' {VMX}.
+    kIdVmclear,                          //!< Instruction 'vmclear' {VMX}.
+    kIdVmfunc,                           //!< Instruction 'vmfunc' {VMX}.
+    kIdVminpd,                           //!< Instruction 'vminpd' {AVX|AVX512_F+VL}.
+    kIdVminph,                           //!< Instruction 'vminph' {AVX512_FP16+VL}.
+    kIdVminps,                           //!< Instruction 'vminps' {AVX|AVX512_F+VL}.
+    kIdVminsd,                           //!< Instruction 'vminsd' {AVX|AVX512_F+VL}.
+    kIdVminsh,                           //!< Instruction 'vminsh' {AVX512_FP16}.
+    kIdVminss,                           //!< Instruction 'vminss' {AVX|AVX512_F+VL}.
+    kIdVmlaunch,                         //!< Instruction 'vmlaunch' {VMX}.
+    kIdVmload,                           //!< Instruction 'vmload' {SVM}.
+    kIdVmmcall,                          //!< Instruction 'vmmcall' {SVM}.
+    kIdVmovapd,                          //!< Instruction 'vmovapd' {AVX|AVX512_F+VL}.
+    kIdVmovaps,                          //!< Instruction 'vmovaps' {AVX|AVX512_F+VL}.
+    kIdVmovd,                            //!< Instruction 'vmovd' {AVX|AVX512_F}.
+    kIdVmovddup,                         //!< Instruction 'vmovddup' {AVX|AVX512_F+VL}.
+    kIdVmovdqa,                          //!< Instruction 'vmovdqa' {AVX}.
+    kIdVmovdqa32,                        //!< Instruction 'vmovdqa32' {AVX512_F+VL}.
+    kIdVmovdqa64,                        //!< Instruction 'vmovdqa64' {AVX512_F+VL}.
+    kIdVmovdqu,                          //!< Instruction 'vmovdqu' {AVX}.
+    kIdVmovdqu16,                        //!< Instruction 'vmovdqu16' {AVX512_BW+VL}.
+    kIdVmovdqu32,                        //!< Instruction 'vmovdqu32' {AVX512_F+VL}.
+    kIdVmovdqu64,                        //!< Instruction 'vmovdqu64' {AVX512_F+VL}.
+    kIdVmovdqu8,                         //!< Instruction 'vmovdqu8' {AVX512_BW+VL}.
+    kIdVmovhlps,                         //!< Instruction 'vmovhlps' {AVX|AVX512_F}.
+    kIdVmovhpd,                          //!< Instruction 'vmovhpd' {AVX|AVX512_F}.
+    kIdVmovhps,                          //!< Instruction 'vmovhps' {AVX|AVX512_F}.
+    kIdVmovlhps,                         //!< Instruction 'vmovlhps' {AVX|AVX512_F}.
+    kIdVmovlpd,                          //!< Instruction 'vmovlpd' {AVX|AVX512_F}.
+    kIdVmovlps,                          //!< Instruction 'vmovlps' {AVX|AVX512_F}.
+    kIdVmovmskpd,                        //!< Instruction 'vmovmskpd' {AVX}.
+    kIdVmovmskps,                        //!< Instruction 'vmovmskps' {AVX}.
+    kIdVmovntdq,                         //!< Instruction 'vmovntdq' {AVX|AVX512_F+VL}.
+    kIdVmovntdqa,                        //!< Instruction 'vmovntdqa' {AVX|AVX2|AVX512_F+VL}.
+    kIdVmovntpd,                         //!< Instruction 'vmovntpd' {AVX|AVX512_F+VL}.
+    kIdVmovntps,                         //!< Instruction 'vmovntps' {AVX|AVX512_F+VL}.
+    kIdVmovq,                            //!< Instruction 'vmovq' {AVX|AVX512_F}.
+    kIdVmovsd,                           //!< Instruction 'vmovsd' {AVX|AVX512_F}.
+    kIdVmovsh,                           //!< Instruction 'vmovsh' {AVX512_FP16}.
+    kIdVmovshdup,                        //!< Instruction 'vmovshdup' {AVX|AVX512_F+VL}.
+    kIdVmovsldup,                        //!< Instruction 'vmovsldup' {AVX|AVX512_F+VL}.
+    kIdVmovss,                           //!< Instruction 'vmovss' {AVX|AVX512_F}.
+    kIdVmovupd,                          //!< Instruction 'vmovupd' {AVX|AVX512_F+VL}.
+    kIdVmovups,                          //!< Instruction 'vmovups' {AVX|AVX512_F+VL}.
+    kIdVmovw,                            //!< Instruction 'vmovw' {AVX512_FP16}.
+    kIdVmpsadbw,                         //!< Instruction 'vmpsadbw' {AVX|AVX2}.
+    kIdVmptrld,                          //!< Instruction 'vmptrld' {VMX}.
+    kIdVmptrst,                          //!< Instruction 'vmptrst' {VMX}.
+    kIdVmread,                           //!< Instruction 'vmread' {VMX}.
+    kIdVmresume,                         //!< Instruction 'vmresume' {VMX}.
+    kIdVmrun,                            //!< Instruction 'vmrun' {SVM}.
+    kIdVmsave,                           //!< Instruction 'vmsave' {SVM}.
+    kIdVmulpd,                           //!< Instruction 'vmulpd' {AVX|AVX512_F+VL}.
+    kIdVmulph,                           //!< Instruction 'vmulph' {AVX512_FP16+VL}.
+    kIdVmulps,                           //!< Instruction 'vmulps' {AVX|AVX512_F+VL}.
+    kIdVmulsd,                           //!< Instruction 'vmulsd' {AVX|AVX512_F}.
+    kIdVmulsh,                           //!< Instruction 'vmulsh' {AVX512_FP16}.
+    kIdVmulss,                           //!< Instruction 'vmulss' {AVX|AVX512_F}.
+    kIdVmwrite,                          //!< Instruction 'vmwrite' {VMX}.
+    kIdVmxon,                            //!< Instruction 'vmxon' {VMX}.
+    kIdVorpd,                            //!< Instruction 'vorpd' {AVX|AVX512_DQ+VL}.
+    kIdVorps,                            //!< Instruction 'vorps' {AVX|AVX512_DQ+VL}.
+    kIdVp2intersectd,                    //!< Instruction 'vp2intersectd' {AVX512_VP2INTERSECT}.
+    kIdVp2intersectq,                    //!< Instruction 'vp2intersectq' {AVX512_VP2INTERSECT}.
+    kIdVp4dpwssd,                        //!< Instruction 'vp4dpwssd' {AVX512_4VNNIW}.
+    kIdVp4dpwssds,                       //!< Instruction 'vp4dpwssds' {AVX512_4VNNIW}.
+    kIdVpabsb,                           //!< Instruction 'vpabsb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpabsd,                           //!< Instruction 'vpabsd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpabsq,                           //!< Instruction 'vpabsq' {AVX512_F+VL}.
+    kIdVpabsw,                           //!< Instruction 'vpabsw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpackssdw,                        //!< Instruction 'vpackssdw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpacksswb,                        //!< Instruction 'vpacksswb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpackusdw,                        //!< Instruction 'vpackusdw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpackuswb,                        //!< Instruction 'vpackuswb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpaddb,                           //!< Instruction 'vpaddb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpaddd,                           //!< Instruction 'vpaddd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpaddq,                           //!< Instruction 'vpaddq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpaddsb,                          //!< Instruction 'vpaddsb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpaddsw,                          //!< Instruction 'vpaddsw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpaddusb,                         //!< Instruction 'vpaddusb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpaddusw,                         //!< Instruction 'vpaddusw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpaddw,                           //!< Instruction 'vpaddw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpalignr,                         //!< Instruction 'vpalignr' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpand,                            //!< Instruction 'vpand' {AVX|AVX2}.
+    kIdVpandd,                           //!< Instruction 'vpandd' {AVX512_F+VL}.
+    kIdVpandn,                           //!< Instruction 'vpandn' {AVX|AVX2}.
+    kIdVpandnd,                          //!< Instruction 'vpandnd' {AVX512_F+VL}.
+    kIdVpandnq,                          //!< Instruction 'vpandnq' {AVX512_F+VL}.
+    kIdVpandq,                           //!< Instruction 'vpandq' {AVX512_F+VL}.
+    kIdVpavgb,                           //!< Instruction 'vpavgb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpavgw,                           //!< Instruction 'vpavgw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpblendd,                         //!< Instruction 'vpblendd' {AVX2}.
+    kIdVpblendmb,                        //!< Instruction 'vpblendmb' {AVX512_BW+VL}.
+    kIdVpblendmd,                        //!< Instruction 'vpblendmd' {AVX512_F+VL}.
+    kIdVpblendmq,                        //!< Instruction 'vpblendmq' {AVX512_F+VL}.
+    kIdVpblendmw,                        //!< Instruction 'vpblendmw' {AVX512_BW+VL}.
+    kIdVpblendvb,                        //!< Instruction 'vpblendvb' {AVX|AVX2}.
+    kIdVpblendw,                         //!< Instruction 'vpblendw' {AVX|AVX2}.
+    kIdVpbroadcastb,                     //!< Instruction 'vpbroadcastb' {AVX2|AVX512_BW+VL}.
+    kIdVpbroadcastd,                     //!< Instruction 'vpbroadcastd' {AVX2|AVX512_F+VL}.
+    kIdVpbroadcastmb2q,                  //!< Instruction 'vpbroadcastmb2q' {AVX512_CDI+VL}.
+    kIdVpbroadcastmw2d,                  //!< Instruction 'vpbroadcastmw2d' {AVX512_CDI+VL}.
+    kIdVpbroadcastq,                     //!< Instruction 'vpbroadcastq' {AVX2|AVX512_F+VL}.
+    kIdVpbroadcastw,                     //!< Instruction 'vpbroadcastw' {AVX2|AVX512_BW+VL}.
+    kIdVpclmulqdq,                       //!< Instruction 'vpclmulqdq' {AVX|AVX512_F+VL & PCLMULQDQ|VPCLMULQDQ}.
+    kIdVpcmov,                           //!< Instruction 'vpcmov' {XOP}.
+    kIdVpcmpb,                           //!< Instruction 'vpcmpb' {AVX512_BW+VL}.
+    kIdVpcmpd,                           //!< Instruction 'vpcmpd' {AVX512_F+VL}.
+    kIdVpcmpeqb,                         //!< Instruction 'vpcmpeqb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpcmpeqd,                         //!< Instruction 'vpcmpeqd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpcmpeqq,                         //!< Instruction 'vpcmpeqq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpcmpeqw,                         //!< Instruction 'vpcmpeqw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpcmpestri,                       //!< Instruction 'vpcmpestri' {AVX}.
+    kIdVpcmpestrm,                       //!< Instruction 'vpcmpestrm' {AVX}.
+    kIdVpcmpgtb,                         //!< Instruction 'vpcmpgtb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpcmpgtd,                         //!< Instruction 'vpcmpgtd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpcmpgtq,                         //!< Instruction 'vpcmpgtq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpcmpgtw,                         //!< Instruction 'vpcmpgtw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpcmpistri,                       //!< Instruction 'vpcmpistri' {AVX}.
+    kIdVpcmpistrm,                       //!< Instruction 'vpcmpistrm' {AVX}.
+    kIdVpcmpq,                           //!< Instruction 'vpcmpq' {AVX512_F+VL}.
+    kIdVpcmpub,                          //!< Instruction 'vpcmpub' {AVX512_BW+VL}.
+    kIdVpcmpud,                          //!< Instruction 'vpcmpud' {AVX512_F+VL}.
+    kIdVpcmpuq,                          //!< Instruction 'vpcmpuq' {AVX512_F+VL}.
+    kIdVpcmpuw,                          //!< Instruction 'vpcmpuw' {AVX512_BW+VL}.
+    kIdVpcmpw,                           //!< Instruction 'vpcmpw' {AVX512_BW+VL}.
+    kIdVpcomb,                           //!< Instruction 'vpcomb' {XOP}.
+    kIdVpcomd,                           //!< Instruction 'vpcomd' {XOP}.
+    kIdVpcompressb,                      //!< Instruction 'vpcompressb' {AVX512_VBMI2+VL}.
+    kIdVpcompressd,                      //!< Instruction 'vpcompressd' {AVX512_F+VL}.
+    kIdVpcompressq,                      //!< Instruction 'vpcompressq' {AVX512_F+VL}.
+    kIdVpcompressw,                      //!< Instruction 'vpcompressw' {AVX512_VBMI2+VL}.
+    kIdVpcomq,                           //!< Instruction 'vpcomq' {XOP}.
+    kIdVpcomub,                          //!< Instruction 'vpcomub' {XOP}.
+    kIdVpcomud,                          //!< Instruction 'vpcomud' {XOP}.
+    kIdVpcomuq,                          //!< Instruction 'vpcomuq' {XOP}.
+    kIdVpcomuw,                          //!< Instruction 'vpcomuw' {XOP}.
+    kIdVpcomw,                           //!< Instruction 'vpcomw' {XOP}.
+    kIdVpconflictd,                      //!< Instruction 'vpconflictd' {AVX512_CDI+VL}.
+    kIdVpconflictq,                      //!< Instruction 'vpconflictq' {AVX512_CDI+VL}.
+    kIdVpdpbusd,                         //!< Instruction 'vpdpbusd' {AVX_VNNI|AVX512_VNNI+VL}.
+    kIdVpdpbusds,                        //!< Instruction 'vpdpbusds' {AVX_VNNI|AVX512_VNNI+VL}.
+    kIdVpdpwssd,                         //!< Instruction 'vpdpwssd' {AVX_VNNI|AVX512_VNNI+VL}.
+    kIdVpdpwssds,                        //!< Instruction 'vpdpwssds' {AVX_VNNI|AVX512_VNNI+VL}.
+    kIdVperm2f128,                       //!< Instruction 'vperm2f128' {AVX}.
+    kIdVperm2i128,                       //!< Instruction 'vperm2i128' {AVX2}.
+    kIdVpermb,                           //!< Instruction 'vpermb' {AVX512_VBMI+VL}.
+    kIdVpermd,                           //!< Instruction 'vpermd' {AVX2|AVX512_F+VL}.
+    kIdVpermi2b,                         //!< Instruction 'vpermi2b' {AVX512_VBMI+VL}.
+    kIdVpermi2d,                         //!< Instruction 'vpermi2d' {AVX512_F+VL}.
+    kIdVpermi2pd,                        //!< Instruction 'vpermi2pd' {AVX512_F+VL}.
+    kIdVpermi2ps,                        //!< Instruction 'vpermi2ps' {AVX512_F+VL}.
+    kIdVpermi2q,                         //!< Instruction 'vpermi2q' {AVX512_F+VL}.
+    kIdVpermi2w,                         //!< Instruction 'vpermi2w' {AVX512_BW+VL}.
+    kIdVpermil2pd,                       //!< Instruction 'vpermil2pd' {XOP}.
+    kIdVpermil2ps,                       //!< Instruction 'vpermil2ps' {XOP}.
+    kIdVpermilpd,                        //!< Instruction 'vpermilpd' {AVX|AVX512_F+VL}.
+    kIdVpermilps,                        //!< Instruction 'vpermilps' {AVX|AVX512_F+VL}.
+    kIdVpermpd,                          //!< Instruction 'vpermpd' {AVX2|AVX512_F+VL}.
+    kIdVpermps,                          //!< Instruction 'vpermps' {AVX2|AVX512_F+VL}.
+    kIdVpermq,                           //!< Instruction 'vpermq' {AVX2|AVX512_F+VL}.
+    kIdVpermt2b,                         //!< Instruction 'vpermt2b' {AVX512_VBMI+VL}.
+    kIdVpermt2d,                         //!< Instruction 'vpermt2d' {AVX512_F+VL}.
+    kIdVpermt2pd,                        //!< Instruction 'vpermt2pd' {AVX512_F+VL}.
+    kIdVpermt2ps,                        //!< Instruction 'vpermt2ps' {AVX512_F+VL}.
+    kIdVpermt2q,                         //!< Instruction 'vpermt2q' {AVX512_F+VL}.
+    kIdVpermt2w,                         //!< Instruction 'vpermt2w' {AVX512_BW+VL}.
+    kIdVpermw,                           //!< Instruction 'vpermw' {AVX512_BW+VL}.
+    kIdVpexpandb,                        //!< Instruction 'vpexpandb' {AVX512_VBMI2+VL}.
+    kIdVpexpandd,                        //!< Instruction 'vpexpandd' {AVX512_F+VL}.
+    kIdVpexpandq,                        //!< Instruction 'vpexpandq' {AVX512_F+VL}.
+    kIdVpexpandw,                        //!< Instruction 'vpexpandw' {AVX512_VBMI2+VL}.
+    kIdVpextrb,                          //!< Instruction 'vpextrb' {AVX|AVX512_BW}.
+    kIdVpextrd,                          //!< Instruction 'vpextrd' {AVX|AVX512_DQ}.
+    kIdVpextrq,                          //!< Instruction 'vpextrq' {AVX|AVX512_DQ} (X64).
+    kIdVpextrw,                          //!< Instruction 'vpextrw' {AVX|AVX512_BW}.
+    kIdVpgatherdd,                       //!< Instruction 'vpgatherdd' {AVX2|AVX512_F+VL}.
+    kIdVpgatherdq,                       //!< Instruction 'vpgatherdq' {AVX2|AVX512_F+VL}.
+    kIdVpgatherqd,                       //!< Instruction 'vpgatherqd' {AVX2|AVX512_F+VL}.
+    kIdVpgatherqq,                       //!< Instruction 'vpgatherqq' {AVX2|AVX512_F+VL}.
+    kIdVphaddbd,                         //!< Instruction 'vphaddbd' {XOP}.
+    kIdVphaddbq,                         //!< Instruction 'vphaddbq' {XOP}.
+    kIdVphaddbw,                         //!< Instruction 'vphaddbw' {XOP}.
+    kIdVphaddd,                          //!< Instruction 'vphaddd' {AVX|AVX2}.
+    kIdVphadddq,                         //!< Instruction 'vphadddq' {XOP}.
+    kIdVphaddsw,                         //!< Instruction 'vphaddsw' {AVX|AVX2}.
+    kIdVphaddubd,                        //!< Instruction 'vphaddubd' {XOP}.
+    kIdVphaddubq,                        //!< Instruction 'vphaddubq' {XOP}.
+    kIdVphaddubw,                        //!< Instruction 'vphaddubw' {XOP}.
+    kIdVphaddudq,                        //!< Instruction 'vphaddudq' {XOP}.
+    kIdVphadduwd,                        //!< Instruction 'vphadduwd' {XOP}.
+    kIdVphadduwq,                        //!< Instruction 'vphadduwq' {XOP}.
+    kIdVphaddw,                          //!< Instruction 'vphaddw' {AVX|AVX2}.
+    kIdVphaddwd,                         //!< Instruction 'vphaddwd' {XOP}.
+    kIdVphaddwq,                         //!< Instruction 'vphaddwq' {XOP}.
+    kIdVphminposuw,                      //!< Instruction 'vphminposuw' {AVX}.
+    kIdVphsubbw,                         //!< Instruction 'vphsubbw' {XOP}.
+    kIdVphsubd,                          //!< Instruction 'vphsubd' {AVX|AVX2}.
+    kIdVphsubdq,                         //!< Instruction 'vphsubdq' {XOP}.
+    kIdVphsubsw,                         //!< Instruction 'vphsubsw' {AVX|AVX2}.
+    kIdVphsubw,                          //!< Instruction 'vphsubw' {AVX|AVX2}.
+    kIdVphsubwd,                         //!< Instruction 'vphsubwd' {XOP}.
+    kIdVpinsrb,                          //!< Instruction 'vpinsrb' {AVX|AVX512_BW}.
+    kIdVpinsrd,                          //!< Instruction 'vpinsrd' {AVX|AVX512_DQ}.
+    kIdVpinsrq,                          //!< Instruction 'vpinsrq' {AVX|AVX512_DQ} (X64).
+    kIdVpinsrw,                          //!< Instruction 'vpinsrw' {AVX|AVX512_BW}.
+    kIdVplzcntd,                         //!< Instruction 'vplzcntd' {AVX512_CDI+VL}.
+    kIdVplzcntq,                         //!< Instruction 'vplzcntq' {AVX512_CDI+VL}.
+    kIdVpmacsdd,                         //!< Instruction 'vpmacsdd' {XOP}.
+    kIdVpmacsdqh,                        //!< Instruction 'vpmacsdqh' {XOP}.
+    kIdVpmacsdql,                        //!< Instruction 'vpmacsdql' {XOP}.
+    kIdVpmacssdd,                        //!< Instruction 'vpmacssdd' {XOP}.
+    kIdVpmacssdqh,                       //!< Instruction 'vpmacssdqh' {XOP}.
+    kIdVpmacssdql,                       //!< Instruction 'vpmacssdql' {XOP}.
+    kIdVpmacsswd,                        //!< Instruction 'vpmacsswd' {XOP}.
+    kIdVpmacssww,                        //!< Instruction 'vpmacssww' {XOP}.
+    kIdVpmacswd,                         //!< Instruction 'vpmacswd' {XOP}.
+    kIdVpmacsww,                         //!< Instruction 'vpmacsww' {XOP}.
+    kIdVpmadcsswd,                       //!< Instruction 'vpmadcsswd' {XOP}.
+    kIdVpmadcswd,                        //!< Instruction 'vpmadcswd' {XOP}.
+    kIdVpmadd52huq,                      //!< Instruction 'vpmadd52huq' {AVX512_IFMA+VL}.
+    kIdVpmadd52luq,                      //!< Instruction 'vpmadd52luq' {AVX512_IFMA+VL}.
+    kIdVpmaddubsw,                       //!< Instruction 'vpmaddubsw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmaddwd,                         //!< Instruction 'vpmaddwd' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmaskmovd,                       //!< Instruction 'vpmaskmovd' {AVX2}.
+    kIdVpmaskmovq,                       //!< Instruction 'vpmaskmovq' {AVX2}.
+    kIdVpmaxsb,                          //!< Instruction 'vpmaxsb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmaxsd,                          //!< Instruction 'vpmaxsd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmaxsq,                          //!< Instruction 'vpmaxsq' {AVX512_F+VL}.
+    kIdVpmaxsw,                          //!< Instruction 'vpmaxsw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmaxub,                          //!< Instruction 'vpmaxub' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmaxud,                          //!< Instruction 'vpmaxud' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmaxuq,                          //!< Instruction 'vpmaxuq' {AVX512_F+VL}.
+    kIdVpmaxuw,                          //!< Instruction 'vpmaxuw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpminsb,                          //!< Instruction 'vpminsb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpminsd,                          //!< Instruction 'vpminsd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpminsq,                          //!< Instruction 'vpminsq' {AVX512_F+VL}.
+    kIdVpminsw,                          //!< Instruction 'vpminsw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpminub,                          //!< Instruction 'vpminub' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpminud,                          //!< Instruction 'vpminud' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpminuq,                          //!< Instruction 'vpminuq' {AVX512_F+VL}.
+    kIdVpminuw,                          //!< Instruction 'vpminuw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmovb2m,                         //!< Instruction 'vpmovb2m' {AVX512_BW+VL}.
+    kIdVpmovd2m,                         //!< Instruction 'vpmovd2m' {AVX512_DQ+VL}.
+    kIdVpmovdb,                          //!< Instruction 'vpmovdb' {AVX512_F+VL}.
+    kIdVpmovdw,                          //!< Instruction 'vpmovdw' {AVX512_F+VL}.
+    kIdVpmovm2b,                         //!< Instruction 'vpmovm2b' {AVX512_BW+VL}.
+    kIdVpmovm2d,                         //!< Instruction 'vpmovm2d' {AVX512_DQ+VL}.
+    kIdVpmovm2q,                         //!< Instruction 'vpmovm2q' {AVX512_DQ+VL}.
+    kIdVpmovm2w,                         //!< Instruction 'vpmovm2w' {AVX512_BW+VL}.
+    kIdVpmovmskb,                        //!< Instruction 'vpmovmskb' {AVX|AVX2}.
+    kIdVpmovq2m,                         //!< Instruction 'vpmovq2m' {AVX512_DQ+VL}.
+    kIdVpmovqb,                          //!< Instruction 'vpmovqb' {AVX512_F+VL}.
+    kIdVpmovqd,                          //!< Instruction 'vpmovqd' {AVX512_F+VL}.
+    kIdVpmovqw,                          //!< Instruction 'vpmovqw' {AVX512_F+VL}.
+    kIdVpmovsdb,                         //!< Instruction 'vpmovsdb' {AVX512_F+VL}.
+    kIdVpmovsdw,                         //!< Instruction 'vpmovsdw' {AVX512_F+VL}.
+    kIdVpmovsqb,                         //!< Instruction 'vpmovsqb' {AVX512_F+VL}.
+    kIdVpmovsqd,                         //!< Instruction 'vpmovsqd' {AVX512_F+VL}.
+    kIdVpmovsqw,                         //!< Instruction 'vpmovsqw' {AVX512_F+VL}.
+    kIdVpmovswb,                         //!< Instruction 'vpmovswb' {AVX512_BW+VL}.
+    kIdVpmovsxbd,                        //!< Instruction 'vpmovsxbd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovsxbq,                        //!< Instruction 'vpmovsxbq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovsxbw,                        //!< Instruction 'vpmovsxbw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmovsxdq,                        //!< Instruction 'vpmovsxdq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovsxwd,                        //!< Instruction 'vpmovsxwd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovsxwq,                        //!< Instruction 'vpmovsxwq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovusdb,                        //!< Instruction 'vpmovusdb' {AVX512_F+VL}.
+    kIdVpmovusdw,                        //!< Instruction 'vpmovusdw' {AVX512_F+VL}.
+    kIdVpmovusqb,                        //!< Instruction 'vpmovusqb' {AVX512_F+VL}.
+    kIdVpmovusqd,                        //!< Instruction 'vpmovusqd' {AVX512_F+VL}.
+    kIdVpmovusqw,                        //!< Instruction 'vpmovusqw' {AVX512_F+VL}.
+    kIdVpmovuswb,                        //!< Instruction 'vpmovuswb' {AVX512_BW+VL}.
+    kIdVpmovw2m,                         //!< Instruction 'vpmovw2m' {AVX512_BW+VL}.
+    kIdVpmovwb,                          //!< Instruction 'vpmovwb' {AVX512_BW+VL}.
+    kIdVpmovzxbd,                        //!< Instruction 'vpmovzxbd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovzxbq,                        //!< Instruction 'vpmovzxbq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovzxbw,                        //!< Instruction 'vpmovzxbw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmovzxdq,                        //!< Instruction 'vpmovzxdq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovzxwd,                        //!< Instruction 'vpmovzxwd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovzxwq,                        //!< Instruction 'vpmovzxwq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmuldq,                          //!< Instruction 'vpmuldq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmulhrsw,                        //!< Instruction 'vpmulhrsw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmulhuw,                         //!< Instruction 'vpmulhuw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmulhw,                          //!< Instruction 'vpmulhw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmulld,                          //!< Instruction 'vpmulld' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmullq,                          //!< Instruction 'vpmullq' {AVX512_DQ+VL}.
+    kIdVpmullw,                          //!< Instruction 'vpmullw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmultishiftqb,                   //!< Instruction 'vpmultishiftqb' {AVX512_VBMI+VL}.
+    kIdVpmuludq,                         //!< Instruction 'vpmuludq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpopcntb,                         //!< Instruction 'vpopcntb' {AVX512_BITALG+VL}.
+    kIdVpopcntd,                         //!< Instruction 'vpopcntd' {AVX512_VPOPCNTDQ+VL}.
+    kIdVpopcntq,                         //!< Instruction 'vpopcntq' {AVX512_VPOPCNTDQ+VL}.
+    kIdVpopcntw,                         //!< Instruction 'vpopcntw' {AVX512_BITALG+VL}.
+    kIdVpor,                             //!< Instruction 'vpor' {AVX|AVX2}.
+    kIdVpord,                            //!< Instruction 'vpord' {AVX512_F+VL}.
+    kIdVporq,                            //!< Instruction 'vporq' {AVX512_F+VL}.
+    kIdVpperm,                           //!< Instruction 'vpperm' {XOP}.
+    kIdVprold,                           //!< Instruction 'vprold' {AVX512_F+VL}.
+    kIdVprolq,                           //!< Instruction 'vprolq' {AVX512_F+VL}.
+    kIdVprolvd,                          //!< Instruction 'vprolvd' {AVX512_F+VL}.
+    kIdVprolvq,                          //!< Instruction 'vprolvq' {AVX512_F+VL}.
+    kIdVprord,                           //!< Instruction 'vprord' {AVX512_F+VL}.
+    kIdVprorq,                           //!< Instruction 'vprorq' {AVX512_F+VL}.
+    kIdVprorvd,                          //!< Instruction 'vprorvd' {AVX512_F+VL}.
+    kIdVprorvq,                          //!< Instruction 'vprorvq' {AVX512_F+VL}.
+    kIdVprotb,                           //!< Instruction 'vprotb' {XOP}.
+    kIdVprotd,                           //!< Instruction 'vprotd' {XOP}.
+    kIdVprotq,                           //!< Instruction 'vprotq' {XOP}.
+    kIdVprotw,                           //!< Instruction 'vprotw' {XOP}.
+    kIdVpsadbw,                          //!< Instruction 'vpsadbw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpscatterdd,                      //!< Instruction 'vpscatterdd' {AVX512_F+VL}.
+    kIdVpscatterdq,                      //!< Instruction 'vpscatterdq' {AVX512_F+VL}.
+    kIdVpscatterqd,                      //!< Instruction 'vpscatterqd' {AVX512_F+VL}.
+    kIdVpscatterqq,                      //!< Instruction 'vpscatterqq' {AVX512_F+VL}.
+    kIdVpshab,                           //!< Instruction 'vpshab' {XOP}.
+    kIdVpshad,                           //!< Instruction 'vpshad' {XOP}.
+    kIdVpshaq,                           //!< Instruction 'vpshaq' {XOP}.
+    kIdVpshaw,                           //!< Instruction 'vpshaw' {XOP}.
+    kIdVpshlb,                           //!< Instruction 'vpshlb' {XOP}.
+    kIdVpshld,                           //!< Instruction 'vpshld' {XOP}.
+    kIdVpshldd,                          //!< Instruction 'vpshldd' {AVX512_VBMI2+VL}.
+    kIdVpshldq,                          //!< Instruction 'vpshldq' {AVX512_VBMI2+VL}.
+    kIdVpshldvd,                         //!< Instruction 'vpshldvd' {AVX512_VBMI2+VL}.
+    kIdVpshldvq,                         //!< Instruction 'vpshldvq' {AVX512_VBMI2+VL}.
+    kIdVpshldvw,                         //!< Instruction 'vpshldvw' {AVX512_VBMI2+VL}.
+    kIdVpshldw,                          //!< Instruction 'vpshldw' {AVX512_VBMI2+VL}.
+    kIdVpshlq,                           //!< Instruction 'vpshlq' {XOP}.
+    kIdVpshlw,                           //!< Instruction 'vpshlw' {XOP}.
+    kIdVpshrdd,                          //!< Instruction 'vpshrdd' {AVX512_VBMI2+VL}.
+    kIdVpshrdq,                          //!< Instruction 'vpshrdq' {AVX512_VBMI2+VL}.
+    kIdVpshrdvd,                         //!< Instruction 'vpshrdvd' {AVX512_VBMI2+VL}.
+    kIdVpshrdvq,                         //!< Instruction 'vpshrdvq' {AVX512_VBMI2+VL}.
+    kIdVpshrdvw,                         //!< Instruction 'vpshrdvw' {AVX512_VBMI2+VL}.
+    kIdVpshrdw,                          //!< Instruction 'vpshrdw' {AVX512_VBMI2+VL}.
+    kIdVpshufb,                          //!< Instruction 'vpshufb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpshufbitqmb,                     //!< Instruction 'vpshufbitqmb' {AVX512_BITALG+VL}.
+    kIdVpshufd,                          //!< Instruction 'vpshufd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpshufhw,                         //!< Instruction 'vpshufhw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpshuflw,                         //!< Instruction 'vpshuflw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsignb,                          //!< Instruction 'vpsignb' {AVX|AVX2}.
+    kIdVpsignd,                          //!< Instruction 'vpsignd' {AVX|AVX2}.
+    kIdVpsignw,                          //!< Instruction 'vpsignw' {AVX|AVX2}.
+    kIdVpslld,                           //!< Instruction 'vpslld' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpslldq,                          //!< Instruction 'vpslldq' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsllq,                           //!< Instruction 'vpsllq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpsllvd,                          //!< Instruction 'vpsllvd' {AVX2|AVX512_F+VL}.
+    kIdVpsllvq,                          //!< Instruction 'vpsllvq' {AVX2|AVX512_F+VL}.
+    kIdVpsllvw,                          //!< Instruction 'vpsllvw' {AVX512_BW+VL}.
+    kIdVpsllw,                           //!< Instruction 'vpsllw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsrad,                           //!< Instruction 'vpsrad' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpsraq,                           //!< Instruction 'vpsraq' {AVX512_F+VL}.
+    kIdVpsravd,                          //!< Instruction 'vpsravd' {AVX2|AVX512_F+VL}.
+    kIdVpsravq,                          //!< Instruction 'vpsravq' {AVX512_F+VL}.
+    kIdVpsravw,                          //!< Instruction 'vpsravw' {AVX512_BW+VL}.
+    kIdVpsraw,                           //!< Instruction 'vpsraw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsrld,                           //!< Instruction 'vpsrld' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpsrldq,                          //!< Instruction 'vpsrldq' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsrlq,                           //!< Instruction 'vpsrlq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpsrlvd,                          //!< Instruction 'vpsrlvd' {AVX2|AVX512_F+VL}.
+    kIdVpsrlvq,                          //!< Instruction 'vpsrlvq' {AVX2|AVX512_F+VL}.
+    kIdVpsrlvw,                          //!< Instruction 'vpsrlvw' {AVX512_BW+VL}.
+    kIdVpsrlw,                           //!< Instruction 'vpsrlw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsubb,                           //!< Instruction 'vpsubb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsubd,                           //!< Instruction 'vpsubd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpsubq,                           //!< Instruction 'vpsubq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpsubsb,                          //!< Instruction 'vpsubsb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsubsw,                          //!< Instruction 'vpsubsw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsubusb,                         //!< Instruction 'vpsubusb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsubusw,                         //!< Instruction 'vpsubusw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsubw,                           //!< Instruction 'vpsubw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpternlogd,                       //!< Instruction 'vpternlogd' {AVX512_F+VL}.
+    kIdVpternlogq,                       //!< Instruction 'vpternlogq' {AVX512_F+VL}.
+    kIdVptest,                           //!< Instruction 'vptest' {AVX}.
+    kIdVptestmb,                         //!< Instruction 'vptestmb' {AVX512_BW+VL}.
+    kIdVptestmd,                         //!< Instruction 'vptestmd' {AVX512_F+VL}.
+    kIdVptestmq,                         //!< Instruction 'vptestmq' {AVX512_F+VL}.
+    kIdVptestmw,                         //!< Instruction 'vptestmw' {AVX512_BW+VL}.
+    kIdVptestnmb,                        //!< Instruction 'vptestnmb' {AVX512_BW+VL}.
+    kIdVptestnmd,                        //!< Instruction 'vptestnmd' {AVX512_F+VL}.
+    kIdVptestnmq,                        //!< Instruction 'vptestnmq' {AVX512_F+VL}.
+    kIdVptestnmw,                        //!< Instruction 'vptestnmw' {AVX512_BW+VL}.
+    kIdVpunpckhbw,                       //!< Instruction 'vpunpckhbw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpunpckhdq,                       //!< Instruction 'vpunpckhdq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpunpckhqdq,                      //!< Instruction 'vpunpckhqdq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpunpckhwd,                       //!< Instruction 'vpunpckhwd' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpunpcklbw,                       //!< Instruction 'vpunpcklbw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpunpckldq,                       //!< Instruction 'vpunpckldq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpunpcklqdq,                      //!< Instruction 'vpunpcklqdq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpunpcklwd,                       //!< Instruction 'vpunpcklwd' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpxor,                            //!< Instruction 'vpxor' {AVX|AVX2}.
+    kIdVpxord,                           //!< Instruction 'vpxord' {AVX512_F+VL}.
+    kIdVpxorq,                           //!< Instruction 'vpxorq' {AVX512_F+VL}.
+    kIdVrangepd,                         //!< Instruction 'vrangepd' {AVX512_DQ+VL}.
+    kIdVrangeps,                         //!< Instruction 'vrangeps' {AVX512_DQ+VL}.
+    kIdVrangesd,                         //!< Instruction 'vrangesd' {AVX512_DQ}.
+    kIdVrangess,                         //!< Instruction 'vrangess' {AVX512_DQ}.
+    kIdVrcp14pd,                         //!< Instruction 'vrcp14pd' {AVX512_F+VL}.
+    kIdVrcp14ps,                         //!< Instruction 'vrcp14ps' {AVX512_F+VL}.
+    kIdVrcp14sd,                         //!< Instruction 'vrcp14sd' {AVX512_F}.
+    kIdVrcp14ss,                         //!< Instruction 'vrcp14ss' {AVX512_F}.
+    kIdVrcp28pd,                         //!< Instruction 'vrcp28pd' {AVX512_ERI}.
+    kIdVrcp28ps,                         //!< Instruction 'vrcp28ps' {AVX512_ERI}.
+    kIdVrcp28sd,                         //!< Instruction 'vrcp28sd' {AVX512_ERI}.
+    kIdVrcp28ss,                         //!< Instruction 'vrcp28ss' {AVX512_ERI}.
+    kIdVrcpph,                           //!< Instruction 'vrcpph' {AVX512_FP16}.
+    kIdVrcpps,                           //!< Instruction 'vrcpps' {AVX}.
+    kIdVrcpsh,                           //!< Instruction 'vrcpsh' {AVX512_FP16}.
+    kIdVrcpss,                           //!< Instruction 'vrcpss' {AVX}.
+    kIdVreducepd,                        //!< Instruction 'vreducepd' {AVX512_DQ+VL}.
+    kIdVreduceph,                        //!< Instruction 'vreduceph' {AVX512_FP16+VL}.
+    kIdVreduceps,                        //!< Instruction 'vreduceps' {AVX512_DQ+VL}.
+    kIdVreducesd,                        //!< Instruction 'vreducesd' {AVX512_DQ}.
+    kIdVreducesh,                        //!< Instruction 'vreducesh' {AVX512_FP16}.
+    kIdVreducess,                        //!< Instruction 'vreducess' {AVX512_DQ}.
+    kIdVrndscalepd,                      //!< Instruction 'vrndscalepd' {AVX512_F+VL}.
+    kIdVrndscaleph,                      //!< Instruction 'vrndscaleph' {AVX512_FP16+VL}.
+    kIdVrndscaleps,                      //!< Instruction 'vrndscaleps' {AVX512_F+VL}.
+    kIdVrndscalesd,                      //!< Instruction 'vrndscalesd' {AVX512_F}.
+    kIdVrndscalesh,                      //!< Instruction 'vrndscalesh' {AVX512_FP16}.
+    kIdVrndscaless,                      //!< Instruction 'vrndscaless' {AVX512_F}.
+    kIdVroundpd,                         //!< Instruction 'vroundpd' {AVX}.
+    kIdVroundps,                         //!< Instruction 'vroundps' {AVX}.
+    kIdVroundsd,                         //!< Instruction 'vroundsd' {AVX}.
+    kIdVroundss,                         //!< Instruction 'vroundss' {AVX}.
+    kIdVrsqrt14pd,                       //!< Instruction 'vrsqrt14pd' {AVX512_F+VL}.
+    kIdVrsqrt14ps,                       //!< Instruction 'vrsqrt14ps' {AVX512_F+VL}.
+    kIdVrsqrt14sd,                       //!< Instruction 'vrsqrt14sd' {AVX512_F}.
+    kIdVrsqrt14ss,                       //!< Instruction 'vrsqrt14ss' {AVX512_F}.
+    kIdVrsqrt28pd,                       //!< Instruction 'vrsqrt28pd' {AVX512_ERI}.
+    kIdVrsqrt28ps,                       //!< Instruction 'vrsqrt28ps' {AVX512_ERI}.
+    kIdVrsqrt28sd,                       //!< Instruction 'vrsqrt28sd' {AVX512_ERI}.
+    kIdVrsqrt28ss,                       //!< Instruction 'vrsqrt28ss' {AVX512_ERI}.
+    kIdVrsqrtph,                         //!< Instruction 'vrsqrtph' {AVX512_FP16+VL}.
+    kIdVrsqrtps,                         //!< Instruction 'vrsqrtps' {AVX}.
+    kIdVrsqrtsh,                         //!< Instruction 'vrsqrtsh' {AVX512_FP16}.
+    kIdVrsqrtss,                         //!< Instruction 'vrsqrtss' {AVX}.
+    kIdVscalefpd,                        //!< Instruction 'vscalefpd' {AVX512_F+VL}.
+    kIdVscalefph,                        //!< Instruction 'vscalefph' {AVX512_FP16+VL}.
+    kIdVscalefps,                        //!< Instruction 'vscalefps' {AVX512_F+VL}.
+    kIdVscalefsd,                        //!< Instruction 'vscalefsd' {AVX512_F}.
+    kIdVscalefsh,                        //!< Instruction 'vscalefsh' {AVX512_FP16}.
+    kIdVscalefss,                        //!< Instruction 'vscalefss' {AVX512_F}.
+    kIdVscatterdpd,                      //!< Instruction 'vscatterdpd' {AVX512_F+VL}.
+    kIdVscatterdps,                      //!< Instruction 'vscatterdps' {AVX512_F+VL}.
+    kIdVscatterpf0dpd,                   //!< Instruction 'vscatterpf0dpd' {AVX512_PFI}.
+    kIdVscatterpf0dps,                   //!< Instruction 'vscatterpf0dps' {AVX512_PFI}.
+    kIdVscatterpf0qpd,                   //!< Instruction 'vscatterpf0qpd' {AVX512_PFI}.
+    kIdVscatterpf0qps,                   //!< Instruction 'vscatterpf0qps' {AVX512_PFI}.
+    kIdVscatterpf1dpd,                   //!< Instruction 'vscatterpf1dpd' {AVX512_PFI}.
+    kIdVscatterpf1dps,                   //!< Instruction 'vscatterpf1dps' {AVX512_PFI}.
+    kIdVscatterpf1qpd,                   //!< Instruction 'vscatterpf1qpd' {AVX512_PFI}.
+    kIdVscatterpf1qps,                   //!< Instruction 'vscatterpf1qps' {AVX512_PFI}.
+    kIdVscatterqpd,                      //!< Instruction 'vscatterqpd' {AVX512_F+VL}.
+    kIdVscatterqps,                      //!< Instruction 'vscatterqps' {AVX512_F+VL}.
+    kIdVshuff32x4,                       //!< Instruction 'vshuff32x4' {AVX512_F+VL}.
+    kIdVshuff64x2,                       //!< Instruction 'vshuff64x2' {AVX512_F+VL}.
+    kIdVshufi32x4,                       //!< Instruction 'vshufi32x4' {AVX512_F+VL}.
+    kIdVshufi64x2,                       //!< Instruction 'vshufi64x2' {AVX512_F+VL}.
+    kIdVshufpd,                          //!< Instruction 'vshufpd' {AVX|AVX512_F+VL}.
+    kIdVshufps,                          //!< Instruction 'vshufps' {AVX|AVX512_F+VL}.
+    kIdVsqrtpd,                          //!< Instruction 'vsqrtpd' {AVX|AVX512_F+VL}.
+    kIdVsqrtph,                          //!< Instruction 'vsqrtph' {AVX512_FP16+VL}.
+    kIdVsqrtps,                          //!< Instruction 'vsqrtps' {AVX|AVX512_F+VL}.
+    kIdVsqrtsd,                          //!< Instruction 'vsqrtsd' {AVX|AVX512_F}.
+    kIdVsqrtsh,                          //!< Instruction 'vsqrtsh' {AVX512_FP16}.
+    kIdVsqrtss,                          //!< Instruction 'vsqrtss' {AVX|AVX512_F}.
+    kIdVstmxcsr,                         //!< Instruction 'vstmxcsr' {AVX}.
+    kIdVsubpd,                           //!< Instruction 'vsubpd' {AVX|AVX512_F+VL}.
+    kIdVsubph,                           //!< Instruction 'vsubph' {AVX512_FP16+VL}.
+    kIdVsubps,                           //!< Instruction 'vsubps' {AVX|AVX512_F+VL}.
+    kIdVsubsd,                           //!< Instruction 'vsubsd' {AVX|AVX512_F}.
+    kIdVsubsh,                           //!< Instruction 'vsubsh' {AVX512_FP16}.
+    kIdVsubss,                           //!< Instruction 'vsubss' {AVX|AVX512_F}.
+    kIdVtestpd,                          //!< Instruction 'vtestpd' {AVX}.
+    kIdVtestps,                          //!< Instruction 'vtestps' {AVX}.
+    kIdVucomisd,                         //!< Instruction 'vucomisd' {AVX|AVX512_F}.
+    kIdVucomish,                         //!< Instruction 'vucomish' {AVX512_FP16}.
+    kIdVucomiss,                         //!< Instruction 'vucomiss' {AVX|AVX512_F}.
+    kIdVunpckhpd,                        //!< Instruction 'vunpckhpd' {AVX|AVX512_F+VL}.
+    kIdVunpckhps,                        //!< Instruction 'vunpckhps' {AVX|AVX512_F+VL}.
+    kIdVunpcklpd,                        //!< Instruction 'vunpcklpd' {AVX|AVX512_F+VL}.
+    kIdVunpcklps,                        //!< Instruction 'vunpcklps' {AVX|AVX512_F+VL}.
+    kIdVxorpd,                           //!< Instruction 'vxorpd' {AVX|AVX512_DQ+VL}.
+    kIdVxorps,                           //!< Instruction 'vxorps' {AVX|AVX512_DQ+VL}.
+    kIdVzeroall,                         //!< Instruction 'vzeroall' {AVX}.
+    kIdVzeroupper,                       //!< Instruction 'vzeroupper' {AVX}.
+    kIdWbinvd,                           //!< Instruction 'wbinvd'.
+    kIdWbnoinvd,                         //!< Instruction 'wbnoinvd' {WBNOINVD}.
+    kIdWrfsbase,                         //!< Instruction 'wrfsbase' {FSGSBASE} (X64).
+    kIdWrgsbase,                         //!< Instruction 'wrgsbase' {FSGSBASE} (X64).
+    kIdWrmsr,                            //!< Instruction 'wrmsr' {MSR}.
+    kIdWrssd,                            //!< Instruction 'wrssd' {CET_SS}.
+    kIdWrssq,                            //!< Instruction 'wrssq' {CET_SS} (X64).
+    kIdWrussd,                           //!< Instruction 'wrussd' {CET_SS}.
+    kIdWrussq,                           //!< Instruction 'wrussq' {CET_SS} (X64).
+    kIdXabort,                           //!< Instruction 'xabort' {RTM}.
+    kIdXadd,                             //!< Instruction 'xadd' {I486}.
+    kIdXbegin,                           //!< Instruction 'xbegin' {RTM}.
+    kIdXchg,                             //!< Instruction 'xchg'.
+    kIdXend,                             //!< Instruction 'xend' {RTM}.
+    kIdXgetbv,                           //!< Instruction 'xgetbv' {XSAVE}.
+    kIdXlatb,                            //!< Instruction 'xlatb'.
+    kIdXor,                              //!< Instruction 'xor'.
+    kIdXorpd,                            //!< Instruction 'xorpd' {SSE2}.
+    kIdXorps,                            //!< Instruction 'xorps' {SSE}.
+    kIdXresldtrk,                        //!< Instruction 'xresldtrk' {TSXLDTRK}.
+    kIdXrstor,                           //!< Instruction 'xrstor' {XSAVE}.
+    kIdXrstor64,                         //!< Instruction 'xrstor64' {XSAVE} (X64).
+    kIdXrstors,                          //!< Instruction 'xrstors' {XSAVES}.
+    kIdXrstors64,                        //!< Instruction 'xrstors64' {XSAVES} (X64).
+    kIdXsave,                            //!< Instruction 'xsave' {XSAVE}.
+    kIdXsave64,                          //!< Instruction 'xsave64' {XSAVE} (X64).
+    kIdXsavec,                           //!< Instruction 'xsavec' {XSAVEC}.
+    kIdXsavec64,                         //!< Instruction 'xsavec64' {XSAVEC} (X64).
+    kIdXsaveopt,                         //!< Instruction 'xsaveopt' {XSAVEOPT}.
+    kIdXsaveopt64,                       //!< Instruction 'xsaveopt64' {XSAVEOPT} (X64).
+    kIdXsaves,                           //!< Instruction 'xsaves' {XSAVES}.
+    kIdXsaves64,                         //!< Instruction 'xsaves64' {XSAVES} (X64).
+    kIdXsetbv,                           //!< Instruction 'xsetbv' {XSAVE}.
+    kIdXsusldtrk,                        //!< Instruction 'xsusldtrk' {TSXLDTRK}.
+    kIdXtest,                            //!< Instruction 'xtest' {TSX}.
+    _kIdCount
+    // ${InstId:End}
+  };
+
+  //! Tests whether the `instId` is defined.
+  static inline constexpr bool isDefinedId(InstId instId) noexcept { return instId < _kIdCount; }
+
+  //! \cond
+  #define ASMJIT_INST_FROM_COND(ID) \
+    ID##o, ID##no, ID##b , ID##ae,  \
+    ID##e, ID##ne, ID##be, ID##a ,  \
+    ID##s, ID##ns, ID##pe, ID##po,  \
+    ID##l, ID##ge, ID##le, ID##g
+
+    static constexpr uint16_t _jccTable[] = { ASMJIT_INST_FROM_COND(Inst::kIdJ) };
+    static constexpr uint16_t _setccTable[] = { ASMJIT_INST_FROM_COND(Inst::kIdSet) };
+    static constexpr uint16_t _cmovccTable[] = { ASMJIT_INST_FROM_COND(Inst::kIdCmov) };
+
+  #undef ASMJIT_INST_FROM_COND
+  //! \endcond
+
+  //! Translates a condition code `cond` to a `jcc` instruction id.
+  static constexpr InstId jccFromCond(CondCode cond) noexcept { return _jccTable[uint8_t(cond)]; }
+  //! Translates a condition code `cond` to a `setcc` instruction id.
+  static constexpr InstId setccFromCond(CondCode cond) noexcept { return _setccTable[uint8_t(cond)]; }
+  //! Translates a condition code `cond` to a `cmovcc` instruction id.
+  static constexpr InstId cmovccFromCond(CondCode cond) noexcept { return _cmovccTable[uint8_t(cond)]; }
+} // {Inst}
+
+//! FPU status word bits.
+enum class FpuStatusWord : uint16_t {
+  kNone          = 0x0000u,     //!< No bits set.
+
+  kInvalid       = 0x0001u,     //!< Invalid operation.
+  kDenormalized  = 0x0002u,     //!< Denormalized operand.
+  kDivByZero     = 0x0004u,     //!< Division by zero.
+  kOverflow      = 0x0008u,     //!< Overflown.
+  kUnderflow     = 0x0010u,     //!< Underflown.
+  kPrecision     = 0x0020u,     //!< Precision lost.
+  kStackFault    = 0x0040u,     //!< Stack fault.
+  kInterrupt     = 0x0080u,     //!< Interrupt.
+  kC0            = 0x0100u,     //!< C0 flag.
+  kC1            = 0x0200u,     //!< C1 flag.
+  kC2            = 0x0400u,     //!< C2 flag.
+  kTopMask       = 0x3800u,     //!< Top of the stack (mask).
+  kC3            = 0x4000u,     //!< C3 flag.
+  kBusy          = 0x8000u      //!< FPU is busy.
+};
+ASMJIT_DEFINE_ENUM_FLAGS(FpuStatusWord)
+
+//! FPU control word bits.
+enum class FpuControlWord : uint16_t {
+  kNone          = 0x0000u,     //!< No bits set.
+
+  // Bits 0-5
+  // --------
+
+  kEM_Mask       = 0x003Fu,     //!< Exception mask (0x3F).
+  kEM_Invalid    = 0x0001u,     //!< Invalid operation exception.
+  kEM_Denormal   = 0x0002u,     //!< Denormalized operand exception.
+  kEM_DivByZero  = 0x0004u,     //!< Division by zero exception.
+  kEM_Overflow   = 0x0008u,     //!< Overflow exception.
+  kEM_Underflow  = 0x0010u,     //!< Underflow exception.
+  kEM_Inexact    = 0x0020u,     //!< Inexact operation exception.
+
+  // Bits 8-9
+  // --------
+
+  kPC_Mask       = 0x0300u,     //!< Precision control mask.
+  kPC_Float      = 0x0000u,     //!< Single precision (24 bits).
+  kPC_Reserved   = 0x0100u,     //!< Reserved.
+  kPC_Double     = 0x0200u,     //!< Double precision (53 bits).
+  kPC_Extended   = 0x0300u,     //!< Extended precision (64 bits).
+
+  // Bits 10-11
+  // ----------
+
+  kRC_Mask       = 0x0C00u,     //!< Rounding control mask.
+  kRC_Nearest    = 0x0000u,     //!< Round to nearest even.
+  kRC_Down       = 0x0400u,     //!< Round down (floor).
+  kRC_Up         = 0x0800u,     //!< Round up (ceil).
+  kRC_Truncate   = 0x0C00u,     //!< Round towards zero (truncate).
+
+  // Bit 12
+  // ------
+
+  kIC_Mask       = 0x1000u,     //!< Infinity control.
+  kIC_Projective = 0x0000u,     //!< Projective (not supported on X64).
+  kIC_Affine     = 0x1000u      //!< Affine (default).
+};
+ASMJIT_DEFINE_ENUM_FLAGS(FpuControlWord)
+
+//! An immediate value that can be used with CMP[PD|PS|SD|SS] instructions.
+enum class CmpImm : uint8_t {
+  kEQ            = 0x00u,       //!< Equal (Quiet), same as \ref VCmpImm::kEQ_OQ.
+  kLT            = 0x01u,       //!< Less (Signaling), same as \ref VCmpImm::kLT_OS.
+  kLE            = 0x02u,       //!< Less/Equal (Signaling), same as \ref VCmpImm::kLE_OS.
+  kUNORD         = 0x03u,       //!< Unordered (Quiet), same as \ref VCmpImm::kUNORD_Q.
+  kNEQ           = 0x04u,       //!< Not Equal (Quiet), same as \ref VCmpImm::kNEQ_UQ.
+  kNLT           = 0x05u,       //!< Not Less (Signaling), same as \ref VCmpImm::kNLT_US.
+  kNLE           = 0x06u,       //!< Not Less/Equal (Signaling), same as \ref VCmpImm::kNLE_US.
+  kORD           = 0x07u        //!< Ordered (Quiet), same as \ref VCmpImm::kORD_Q.
+};
+
+//! An immediate value that can be used with [V]PCMP[I|E]STR[I|M] instructions.
+enum class PCmpStrImm : uint8_t {
+  // Source Data Format
+  // ------------------
+
+  kUB            = 0x00u << 0,  //!< The source data format is unsigned bytes.
+  kUW            = 0x01u << 0,  //!< The source data format is unsigned words.
+  kSB            = 0x02u << 0,  //!< The source data format is signed bytes.
+  kSW            = 0x03u << 0,  //!< The source data format is signed words.
+
+  // Aggregation Operation
+  // ---------------------
+
+  kEqualAny      = 0x00u << 2,  //!< The arithmetic comparison is "equal".
+  kRanges        = 0x01u << 2,  //!< The arithmetic comparison is "greater than or equal" between even indexed
+                                //!< elements and "less than or equal" between odd indexed elements.
+  kEqualEach     = 0x02u << 2,  //!< The arithmetic comparison is "equal".
+  kEqualOrdered  = 0x03u << 2,  //!< The arithmetic comparison is "equal".
+
+  // Polarity
+  // --------
+
+  kPosPolarity   = 0x00u << 4,  //!< IntRes2 = IntRes1.
+  kNegPolarity   = 0x01u << 4,  //!< IntRes2 = -1 XOR IntRes1.
+  kPosMasked     = 0x02u << 4,  //!< IntRes2 = IntRes1.
+  kNegMasked     = 0x03u << 4,  //!< IntRes2[i] = second[i] == invalid ? IntRes1[i] : ~IntRes1[i].
+
+  // Output Selection (pcmpstri)
+  // ---------------------------
+
+  kOutputLSI     = 0x00u << 6,  //!< The index returned to ECX is of the least significant set bit in IntRes2.
+  kOutputMSI     = 0x01u << 6,  //!< The index returned to ECX is of the most significant set bit in IntRes2.
+
+  // Output Selection (pcmpstrm)
+  // ---------------------------
+
+  kBitMask       = 0x00u << 6,  //!< IntRes2 is returned as the mask to the least significant bits of XMM0.
+  kIndexMask     = 0x01u << 6   //!< IntRes2 is expanded into a byte/word mask and placed in XMM0.
+};
+ASMJIT_DEFINE_ENUM_FLAGS(PCmpStrImm)
+
+//! An immediate value that can be used with ROUND[PD|PS|SD|SS] instructions.
+//!
+//! \note `kSuppress` is a mask that can be used with any other value.
+enum class RoundImm : uint8_t {
+  kNearest       = 0x00u,       //!< Round to nearest (even).
+  kDown          = 0x01u,       //!< Round to down toward -INF (floor),
+  kUp            = 0x02u,       //!< Round to up toward +INF (ceil).
+  kTrunc         = 0x03u,       //!< Round toward zero (truncate).
+  kCurrent       = 0x04u,       //!< Round to the current rounding mode set (ignores other RC bits).
+  kSuppress      = 0x08u        //!< Supress exceptions (avoids inexact exception, if set).
+};
+ASMJIT_DEFINE_ENUM_FLAGS(RoundImm)
+
+//! An immediate value that can be used with VCMP[PD|PS|SD|SS] instructions (AVX).
+//!
+//! The first 8 values are compatible with \ref CmpImm.
+enum class VCmpImm : uint8_t {
+  kEQ_OQ         = 0x00u,       //!< Equal             (Quiet    , Ordered)  , same as \ref CmpImm::kEQ.
+  kLT_OS         = 0x01u,       //!< Less              (Signaling, Ordered)  , same as \ref CmpImm::kLT.
+  kLE_OS         = 0x02u,       //!< Less/Equal        (Signaling, Ordered)  , same as \ref CmpImm::kLE.
+  kUNORD_Q       = 0x03u,       //!< Unordered         (Quiet)               , same as \ref CmpImm::kUNORD.
+  kNEQ_UQ        = 0x04u,       //!< Not Equal         (Quiet    , Unordered), same as \ref CmpImm::kNEQ.
+  kNLT_US        = 0x05u,       //!< Not Less          (Signaling, Unordered), same as \ref CmpImm::kNLT.
+  kNLE_US        = 0x06u,       //!< Not Less/Equal    (Signaling, Unordered), same as \ref CmpImm::kNLE.
+  kORD_Q         = 0x07u,       //!< Ordered           (Quiet)               , same as \ref CmpImm::kORD.
+  kEQ_UQ         = 0x08u,       //!< Equal             (Quiet    , Unordered).
+  kNGE_US        = 0x09u,       //!< Not Greater/Equal (Signaling, Unordered).
+  kNGT_US        = 0x0Au,       //!< Not Greater       (Signaling, Unordered).
+  kFALSE_OQ      = 0x0Bu,       //!< False             (Quiet    , Ordered).
+  kNEQ_OQ        = 0x0Cu,       //!< Not Equal         (Quiet    , Ordered).
+  kGE_OS         = 0x0Du,       //!< Greater/Equal     (Signaling, Ordered).
+  kGT_OS         = 0x0Eu,       //!< Greater           (Signaling, Ordered).
+  kTRUE_UQ       = 0x0Fu,       //!< True              (Quiet    , Unordered).
+  kEQ_OS         = 0x10u,       //!< Equal             (Signaling, Ordered).
+  kLT_OQ         = 0x11u,       //!< Less              (Quiet    , Ordered).
+  kLE_OQ         = 0x12u,       //!< Less/Equal        (Quiet    , Ordered).
+  kUNORD_S       = 0x13u,       //!< Unordered         (Signaling).
+  kNEQ_US        = 0x14u,       //!< Not Equal         (Signaling, Unordered).
+  kNLT_UQ        = 0x15u,       //!< Not Less          (Quiet    , Unordered).
+  kNLE_UQ        = 0x16u,       //!< Not Less/Equal    (Quiet    , Unordered).
+  kORD_S         = 0x17u,       //!< Ordered           (Signaling).
+  kEQ_US         = 0x18u,       //!< Equal             (Signaling, Unordered).
+  kNGE_UQ        = 0x19u,       //!< Not Greater/Equal (Quiet    , Unordered).
+  kNGT_UQ        = 0x1Au,       //!< Not Greater       (Quiet    , Unordered).
+  kFALSE_OS      = 0x1Bu,       //!< False             (Signaling, Ordered).
+  kNEQ_OS        = 0x1Cu,       //!< Not Equal         (Signaling, Ordered).
+  kGE_OQ         = 0x1Du,       //!< Greater/Equal     (Quiet    , Ordered).
+  kGT_OQ         = 0x1Eu,       //!< Greater           (Quiet    , Ordered).
+  kTRUE_US       = 0x1Fu        //!< True              (Signaling, Unordered).
+};
+
+//! An immediate value that can be used with VFIXUPIMM[PD|PS|SD|SS] instructions (AVX-512).
+//!
+//! The final immediate is a combination of all possible control bits.
+enum class VFixupImm : uint8_t {
+  kNone          = 0x00u,
+  kZEOnZero      = 0x01u,
+  kIEOnZero      = 0x02u,
+  kZEOnOne       = 0x04u,
+  kIEOnOne       = 0x08u,
+  kIEOnSNaN      = 0x10u,
+  kIEOnNInf      = 0x20u,
+  kIEOnNegative  = 0x40u,
+  kIEOnPInf      = 0x80u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(VFixupImm)
+
+//! An immediate value that can be used with VFPCLASS[PD|PS|SD|SS] instructions (AVX-512).
+//!
+//! The values can be combined together to form the final 8-bit mask.
+enum class VFPClassImm : uint8_t {
+  kNone          = 0x00u,
+  kQNaN          = 0x01u,       //!< Checks for QNaN.
+  kPZero         = 0x02u,       //!< Checks for +0.
+  kNZero         = 0x04u,       //!< Checks for -0.
+  kPInf          = 0x08u,       //!< Checks for +Inf.
+  kNInf          = 0x10u,       //!< Checks for -Inf.
+  kDenormal      = 0x20u,       //!< Checks for denormal.
+  kNegative      = 0x40u,       //!< Checks for negative finite value.
+  kSNaN          = 0x80u        //!< Checks for SNaN.
+};
+ASMJIT_DEFINE_ENUM_FLAGS(VFPClassImm)
+
+//! An immediate value that can be used with VGETMANT[PD|PS|SD|SS] instructions (AVX-512).
+//!
+//! The value is a combination of a normalization interval and a sign control.
+enum class VGetMantImm : uint8_t {
+  // Normalization Interval
+  // ----------------------
+
+  k1To2          = 0x00u,       //!< Normalization interval is [1, 2)
+  k1Div2To2      = 0x01u,       //!< Normalization interval is [0.5, 2)
+  k1Div2To1      = 0x02u,       //!< Normalization interval is [0.5, 1)
+  k3Div4To3Div2  = 0x03u,       //!< Normalization interval is [3/4, 3/2)
+
+  // Sign Control
+  // ------------
+
+  kSrcSign       = 0x00u,       //!< Source sign.
+  kNoSign        = 0x04u,       //!< Zero sign
+  kQNaNIfSign    = 0x08u        //!< QNAN_Indefinite if sign(src) != 0, regardless of `kSignSrc` or `kNoSign`.
+};
+ASMJIT_DEFINE_ENUM_FLAGS(VGetMantImm)
+
+//! A predicate used by VPCMP[U][B|W|D|Q] instructions (AVX-512).
+enum class VPCmpImm : uint8_t {
+  kEQ            = 0x00u,       //!< Equal.
+  kLT            = 0x01u,       //!< Less.
+  kLE            = 0x02u,       //!< Less/Equal.
+  kFALSE         = 0x03u,       //!< False.
+  kNE            = 0x04u,       //!< Not Equal.
+  kGE            = 0x05u,       //!< Greater/Equal.
+  kGT            = 0x06u,       //!< Greater.
+  kTRUE          = 0x07u        //!< True.
+};
+
+//! A predicate used by VPCOM[U][B|W|D|Q] instructions (XOP).
+enum class VPComImm : uint8_t {
+  kLT            = 0x00u,       //!< Less.
+  kLE            = 0x01u,       //!< Less/Equal
+  kGT            = 0x02u,       //!< Greater.
+  kGE            = 0x03u,       //!< Greater/Equal.
+  kEQ            = 0x04u,       //!< Equal.
+  kNE            = 0x05u,       //!< Not Equal.
+  kFALSE         = 0x06u,       //!< False.
+  kTRUE          = 0x07u        //!< True.
+};
+
+//! A predicate used by VRANGE[PD|PS|SD|SS] instructions (AVX-512).
+enum class VRangeImm : uint8_t {
+  // Selector
+  // --------
+
+  kSelectMin     = 0x00u,       //!< Select minimum value.
+  kSelectMax     = 0x01u,       //!< Select maximum value.
+  kSelectAbsMin  = 0x02u,       //!< Select minimum absolute value.
+  kSelectAbsMax  = 0x03u,       //!< Select maximum absolute value.
+
+  // Sign
+  // ----
+
+  kSignSrc1      = 0x00u,       //!< Select sign of SRC1.
+  kSignSrc2      = 0x04u,       //!< Select sign of SRC2.
+  kSign0         = 0x08u,       //!< Set sign to 0.
+  kSign1         = 0x0Cu        //!< Set sign to 1.
+};
+ASMJIT_DEFINE_ENUM_FLAGS(VRangeImm)
+
+//! A predicate used by VREDUCE[PD|PS|SD|SS] instructions (AVX-512).
+enum class VReduceImm : uint8_t {
+  kRoundEven     = 0x00u,       //!< Round to nearest even.
+  kRoundDown     = 0x01u,       //!< Round down.
+  kRoundUp       = 0x02u,       //!< Round up.
+  kRoundTrunc    = 0x03u,       //!< Truncate.
+  kRoundCurrent  = 0x04u,       //!< Round to the current mode set.
+  kSuppress      = 0x08u,       //!< Suppress exceptions.
+  kFixedImmMask  = 0xF0u        //!< Fixed length value mask.
+};
+ASMJIT_DEFINE_ENUM_FLAGS(VReduceImm)
+
+//! Creates a \ref VReduceImm from a combination of `flags` and `fixedPointLength`.
+static inline constexpr VReduceImm vReduceImm(VReduceImm flags, uint32_t fixedPointLength) noexcept {
+  return flags | VReduceImm(fixedPointLength << 4);
+}
+
+//! A predicate that can be used as an immediate value with VPTERNLOG[D|Q] instruction.
+//!
+//! There are 3 inputs to the instruction (\ref kA, \ref kB, \ref kC). Ternary logic can define any combination
+//! that would be performed on these 3 inputs to get the desired output - any combination of AND, OR, XOR, NOT
+//! is possible.
+//!
+//! \sa \ref tLogFromBits and \ref fLogIfElse
+enum class TLogImm : uint8_t {
+  k0             = 0x00u,       //!< 0 value.
+  k1             = 0xFFu,       //!< 1 value.
+  kA             = 0xF0u,       //!< A value.
+  kB             = 0xCCu,       //!< B value.
+  kC             = 0xAAu,       //!< C value.
+
+  kNotA          = kA ^ k1,     //!< `!A` expression.
+  kNotB          = kB ^ k1,     //!< `!B` expression.
+  kNotC          = kC ^ k1,     //!< `!C` expression.
+
+  kAB            = kA & kB,     //!< `A & B` expression.
+  kAC            = kA & kC,     //!< `A & C` expression.
+  kBC            = kB & kC,     //!< `B & C` expression.
+  kNotAB         = kAB ^ k1,    //!< `!(A & B)` expression.
+  kNotAC         = kAC ^ k1,    //!< `!(A & C)` expression.
+  kNotBC         = kBC ^ k1,    //!< `!(B & C)` expression.
+
+  kABC           = kAB & kC,    //!< `A & B & C` expression.
+  kNotABC        = kABC ^ k1    //!< `!(A & B & C)` expression.
+};
+ASMJIT_DEFINE_ENUM_FLAGS(TLogImm)
+
+//! Creates an immediate that can be used by VPTERNLOG[D|Q] instructions.
+static inline constexpr TLogImm tLogFromBits(uint8_t b000, uint8_t b001, uint8_t b010, uint8_t b011, uint8_t b100, uint8_t b101, uint8_t b110, uint8_t b111) noexcept {
+  return TLogImm(uint8_t(b000 << 0) |
+                 uint8_t(b001 << 1) |
+                 uint8_t(b010 << 2) |
+                 uint8_t(b011 << 3) |
+                 uint8_t(b100 << 4) |
+                 uint8_t(b101 << 5) |
+                 uint8_t(b110 << 6) |
+                 uint8_t(b111 << 7));
+}
+
+//! Creates an if/else logic that can be used by VPTERNLOG[D|Q] instructions.
+static inline constexpr TLogImm fLogIfElse(TLogImm condition, TLogImm a, TLogImm b) noexcept { return (condition & a) | (~condition & b); }
+
+//! Creates a shuffle immediate value that be used with SSE/AVX/AVX-512 instructions to shuffle 2 elements in a vector.
+//!
+//! \param a Position of the first  component [0, 1].
+//! \param b Position of the second component [0, 1].
+//!
+//! Shuffle constants can be used to encode an immediate for these instructions:
+//!   - `shufpd|vshufpd`
+static inline constexpr uint32_t shuffleImm(uint32_t a, uint32_t b) noexcept {
+  return (a << 1) | b;
+}
+
+//! Creates a shuffle immediate value that be used with SSE/AVX/AVX-512 instructions to shuffle 4 elements in a vector.
+//!
+//! \param a Position of the first  component [0, 3].
+//! \param b Position of the second component [0, 3].
+//! \param c Position of the third  component [0, 3].
+//! \param d Position of the fourth component [0, 3].
+//!
+//! Shuffle constants can be used to encode an immediate for these instructions:
+//!   - `pshufw`
+//!   - `pshuflw|vpshuflw`
+//!   - `pshufhw|vpshufhw`
+//!   - `pshufd|vpshufd`
+//!   - `shufps|vshufps`
+static inline constexpr uint32_t shuffleImm(uint32_t a, uint32_t b, uint32_t c, uint32_t d) noexcept {
+  return (a << 6) | (b << 4) | (c << 2) | d;
+}
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86GLOBALS_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86instapi.cpp b/lib/lepton/asmjit/x86/x86instapi.cpp
new file mode 100644
index 0000000000..3857cc1212
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86instapi.cpp
@@ -0,0 +1,1732 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+// ----------------------------------------------------------------------------
+// IMPORTANT: AsmJit now uses an external instruction database to populate
+// static tables within this file. Perform the following steps to regenerate
+// all tables enclosed by ${...}:
+//
+//   1. Install node.js environment <https://nodejs.org>
+//   2. Go to asmjit/tools directory
+//   3. Get the latest asmdb from <https://github.com/asmjit/asmdb> and
+//      copy/link the `asmdb` directory to `asmjit/tools/asmdb`.
+//   4. Execute `node tablegen-x86.js`
+//
+// Instruction encoding and opcodes were added to the `x86inst.cpp` database
+// manually in the past and they are not updated by the script as it became
+// tricky. However, everything else is updated including instruction operands
+// and tables required to validate them, instruction read/write information
+// (including registers and flags), and all indexes to all tables.
+// ----------------------------------------------------------------------------
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86)
+
+#include "../core/cpuinfo.h"
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../x86/x86instapi_p.h"
+#include "../x86/x86instdb_p.h"
+#include "../x86/x86opcode_p.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::InstInternal - Text
+// ========================
+
+#ifndef ASMJIT_NO_TEXT
+Error InstInternal::instIdToString(Arch arch, InstId instId, String& output) noexcept {
+  DebugUtils::unused(arch);
+
+  if (ASMJIT_UNLIKELY(!Inst::isDefinedId(instId)))
+    return DebugUtils::errored(kErrorInvalidInstruction);
+
+  const InstDB::InstInfo& info = InstDB::infoById(instId);
+  return output.append(InstDB::_nameData + info._nameDataIndex);
+}
+
+InstId InstInternal::stringToInstId(Arch arch, const char* s, size_t len) noexcept {
+  DebugUtils::unused(arch);
+
+  if (ASMJIT_UNLIKELY(!s))
+    return Inst::kIdNone;
+
+  if (len == SIZE_MAX)
+    len = strlen(s);
+
+  if (ASMJIT_UNLIKELY(len == 0 || len > InstDB::kMaxNameSize))
+    return Inst::kIdNone;
+
+  uint32_t prefix = uint32_t(s[0]) - 'a';
+  if (ASMJIT_UNLIKELY(prefix > 'z' - 'a'))
+    return Inst::kIdNone;
+
+  uint32_t index = InstDB::instNameIndex[prefix].start;
+  if (ASMJIT_UNLIKELY(!index))
+    return Inst::kIdNone;
+
+  const char* nameData = InstDB::_nameData;
+  const InstDB::InstInfo* table = InstDB::_instInfoTable;
+
+  const InstDB::InstInfo* base = table + index;
+  const InstDB::InstInfo* end  = table + InstDB::instNameIndex[prefix].end;
+
+  for (size_t lim = (size_t)(end - base); lim != 0; lim >>= 1) {
+    const InstDB::InstInfo* cur = base + (lim >> 1);
+    int result = Support::cmpInstName(nameData + cur[0]._nameDataIndex, s, len);
+
+    if (result < 0) {
+      base = cur + 1;
+      lim--;
+      continue;
+    }
+
+    if (result > 0)
+      continue;
+
+    return InstId((size_t)(cur - table));
+  }
+
+  return Inst::kIdNone;
+}
+#endif // !ASMJIT_NO_TEXT
+
+// x86::InstInternal - Validate
+// ============================
+
+#ifndef ASMJIT_NO_VALIDATION
+struct X86ValidationData {
+  //! Allowed registers by \ref RegType.
+  RegMask allowedRegMask[uint32_t(RegType::kMaxValue) + 1];
+  uint32_t allowedMemBaseRegs;
+  uint32_t allowedMemIndexRegs;
+};
+
+#define VALUE(x) \
+  (x == uint32_t(RegType::kX86_GpbLo)) ? InstDB::OpFlags::kRegGpbLo : \
+  (x == uint32_t(RegType::kX86_GpbHi)) ? InstDB::OpFlags::kRegGpbHi : \
+  (x == uint32_t(RegType::kX86_Gpw  )) ? InstDB::OpFlags::kRegGpw   : \
+  (x == uint32_t(RegType::kX86_Gpd  )) ? InstDB::OpFlags::kRegGpd   : \
+  (x == uint32_t(RegType::kX86_Gpq  )) ? InstDB::OpFlags::kRegGpq   : \
+  (x == uint32_t(RegType::kX86_Xmm  )) ? InstDB::OpFlags::kRegXmm   : \
+  (x == uint32_t(RegType::kX86_Ymm  )) ? InstDB::OpFlags::kRegYmm   : \
+  (x == uint32_t(RegType::kX86_Zmm  )) ? InstDB::OpFlags::kRegZmm   : \
+  (x == uint32_t(RegType::kX86_Mm   )) ? InstDB::OpFlags::kRegMm    : \
+  (x == uint32_t(RegType::kX86_KReg )) ? InstDB::OpFlags::kRegKReg  : \
+  (x == uint32_t(RegType::kX86_SReg )) ? InstDB::OpFlags::kRegSReg  : \
+  (x == uint32_t(RegType::kX86_CReg )) ? InstDB::OpFlags::kRegCReg  : \
+  (x == uint32_t(RegType::kX86_DReg )) ? InstDB::OpFlags::kRegDReg  : \
+  (x == uint32_t(RegType::kX86_St   )) ? InstDB::OpFlags::kRegSt    : \
+  (x == uint32_t(RegType::kX86_Bnd  )) ? InstDB::OpFlags::kRegBnd   : \
+  (x == uint32_t(RegType::kX86_Tmm  )) ? InstDB::OpFlags::kRegTmm   : \
+  (x == uint32_t(RegType::kX86_Rip  )) ? InstDB::OpFlags::kNone     : InstDB::OpFlags::kNone
+static const InstDB::OpFlags _x86OpFlagFromRegType[uint32_t(RegType::kMaxValue) + 1] = { ASMJIT_LOOKUP_TABLE_32(VALUE, 0) };
+#undef VALUE
+
+#define REG_MASK_FROM_REG_TYPE_X86(x) \
+  (x == uint32_t(RegType::kX86_GpbLo)) ? 0x0000000Fu : \
+  (x == uint32_t(RegType::kX86_GpbHi)) ? 0x0000000Fu : \
+  (x == uint32_t(RegType::kX86_Gpw  )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Gpd  )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Gpq  )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Xmm  )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Ymm  )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Zmm  )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Mm   )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_KReg )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_SReg )) ? 0x0000007Eu : \
+  (x == uint32_t(RegType::kX86_CReg )) ? 0x0000FFFFu : \
+  (x == uint32_t(RegType::kX86_DReg )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_St   )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Bnd  )) ? 0x0000000Fu : \
+  (x == uint32_t(RegType::kX86_Tmm  )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Rip  )) ? 0x00000001u : 0u
+
+#define REG_MASK_FROM_REG_TYPE_X64(x) \
+  (x == uint32_t(RegType::kX86_GpbLo)) ? 0x0000FFFFu : \
+  (x == uint32_t(RegType::kX86_GpbHi)) ? 0x0000000Fu : \
+  (x == uint32_t(RegType::kX86_Gpw  )) ? 0x0000FFFFu : \
+  (x == uint32_t(RegType::kX86_Gpd  )) ? 0x0000FFFFu : \
+  (x == uint32_t(RegType::kX86_Gpq  )) ? 0x0000FFFFu : \
+  (x == uint32_t(RegType::kX86_Xmm  )) ? 0xFFFFFFFFu : \
+  (x == uint32_t(RegType::kX86_Ymm  )) ? 0xFFFFFFFFu : \
+  (x == uint32_t(RegType::kX86_Zmm  )) ? 0xFFFFFFFFu : \
+  (x == uint32_t(RegType::kX86_Mm   )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_KReg )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_SReg )) ? 0x0000007Eu : \
+  (x == uint32_t(RegType::kX86_CReg )) ? 0x0000FFFFu : \
+  (x == uint32_t(RegType::kX86_DReg )) ? 0x0000FFFFu : \
+  (x == uint32_t(RegType::kX86_St   )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Bnd  )) ? 0x0000000Fu : \
+  (x == uint32_t(RegType::kX86_Tmm  )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Rip  )) ? 0x00000001u : 0u
+
+#define B(RegType) (uint32_t(1) << uint32_t(RegType))
+
+static const X86ValidationData _x86ValidationData = {
+  { ASMJIT_LOOKUP_TABLE_32(REG_MASK_FROM_REG_TYPE_X86, 0) },
+  B(RegType::kX86_Gpw) | B(RegType::kX86_Gpd) | B(RegType::kX86_Rip) | B(RegType::kLabelTag),
+  B(RegType::kX86_Gpw) | B(RegType::kX86_Gpd) | B(RegType::kX86_Xmm) | B(RegType::kX86_Ymm) | B(RegType::kX86_Zmm)
+};
+
+static const X86ValidationData _x64ValidationData = {
+  { ASMJIT_LOOKUP_TABLE_32(REG_MASK_FROM_REG_TYPE_X64, 0) },
+  B(RegType::kX86_Gpd) | B(RegType::kX86_Gpq) | B(RegType::kX86_Rip) | B(RegType::kLabelTag),
+  B(RegType::kX86_Gpd) | B(RegType::kX86_Gpq) | B(RegType::kX86_Xmm) | B(RegType::kX86_Ymm) | B(RegType::kX86_Zmm)
+};
+
+#undef B
+
+#undef REG_MASK_FROM_REG_TYPE_X64
+#undef REG_MASK_FROM_REG_TYPE_X86
+
+static ASMJIT_FORCE_INLINE bool x86IsZmmOrM512(const Operand_& op) noexcept {
+  return Reg::isZmm(op) || (op.isMem() && op.size() == 64);
+}
+
+static ASMJIT_FORCE_INLINE bool x86CheckOSig(const InstDB::OpSignature& op, const InstDB::OpSignature& ref, bool& immOutOfRange) noexcept {
+  // Fail if operand types are incompatible.
+  InstDB::OpFlags commonFlags = op.flags() & ref.flags();
+
+  if (!Support::test(commonFlags, InstDB::OpFlags::kOpMask)) {
+    // Mark temporarily `immOutOfRange` so we can return a more descriptive error later.
+    if (op.hasImm() && ref.hasImm()) {
+      immOutOfRange = true;
+      return true;
+    }
+
+    return false;
+  }
+
+  // Fail if some memory specific flags do not match.
+  if (Support::test(commonFlags, InstDB::OpFlags::kMemMask)) {
+    if (ref.hasFlag(InstDB::OpFlags::kFlagMemBase) && !op.hasFlag(InstDB::OpFlags::kFlagMemBase))
+      return false;
+  }
+
+  // Fail if register indexes do not match.
+  if (Support::test(commonFlags, InstDB::OpFlags::kRegMask)) {
+    if (ref.regMask() && !Support::test(op.regMask(), ref.regMask()))
+      return false;
+  }
+
+  return true;
+}
+
+ASMJIT_FAVOR_SIZE Error InstInternal::validate(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags) noexcept {
+  // Only called when `arch` matches X86 family.
+  ASMJIT_ASSERT(Environment::isFamilyX86(arch));
+
+  const X86ValidationData* vd;
+  if (arch == Arch::kX86)
+    vd = &_x86ValidationData;
+  else
+    vd = &_x64ValidationData;
+
+  uint32_t i;
+  InstDB::Mode mode = InstDB::modeFromArch(arch);
+
+  // Get the instruction data.
+  InstId instId = inst.id();
+  InstOptions options = inst.options();
+
+  if (ASMJIT_UNLIKELY(!Inst::isDefinedId(instId)))
+    return DebugUtils::errored(kErrorInvalidInstruction);
+
+  const InstDB::InstInfo& instInfo = InstDB::infoById(instId);
+  const InstDB::CommonInfo& commonInfo = instInfo.commonInfo();
+
+  InstDB::InstFlags iFlags = instInfo.flags();
+
+  constexpr InstOptions kRepAny = InstOptions::kX86_Rep | InstOptions::kX86_Repne;
+  constexpr InstOptions kXAcqXRel = InstOptions::kX86_XAcquire | InstOptions::kX86_XRelease;
+  constexpr InstOptions kAvx512Options = InstOptions::kX86_ZMask | InstOptions::kX86_ER | InstOptions::kX86_SAE;
+
+  // Validate LOCK|XACQUIRE|XRELEASE Prefixes
+  // ----------------------------------------
+
+  if (Support::test(options, InstOptions::kX86_Lock | kXAcqXRel)) {
+    if (Support::test(options, InstOptions::kX86_Lock)) {
+      if (ASMJIT_UNLIKELY(!Support::test(iFlags, InstDB::InstFlags::kLock) && !Support::test(options, kXAcqXRel)))
+        return DebugUtils::errored(kErrorInvalidLockPrefix);
+
+      if (ASMJIT_UNLIKELY(opCount < 1 || !operands[0].isMem()))
+        return DebugUtils::errored(kErrorInvalidLockPrefix);
+    }
+
+    if (Support::test(options, kXAcqXRel)) {
+      if (ASMJIT_UNLIKELY(!Support::test(options, InstOptions::kX86_Lock) || (options & kXAcqXRel) == kXAcqXRel))
+        return DebugUtils::errored(kErrorInvalidPrefixCombination);
+
+      if (ASMJIT_UNLIKELY(Support::test(options, InstOptions::kX86_XAcquire) && !Support::test(iFlags, InstDB::InstFlags::kXAcquire)))
+        return DebugUtils::errored(kErrorInvalidXAcquirePrefix);
+
+      if (ASMJIT_UNLIKELY(Support::test(options, InstOptions::kX86_XRelease) && !Support::test(iFlags, InstDB::InstFlags::kXRelease)))
+        return DebugUtils::errored(kErrorInvalidXReleasePrefix);
+    }
+  }
+
+  // Validate REP and REPNE Prefixes
+  // -------------------------------
+
+  if (Support::test(options, kRepAny)) {
+    if (ASMJIT_UNLIKELY((options & kRepAny) == kRepAny))
+      return DebugUtils::errored(kErrorInvalidPrefixCombination);
+
+    if (ASMJIT_UNLIKELY(!Support::test(iFlags, InstDB::InstFlags::kRep)))
+      return DebugUtils::errored(kErrorInvalidRepPrefix);
+  }
+
+  // Translate Each Operand to the Corresponding OpSignature
+  // -------------------------------------------------------
+
+  InstDB::OpSignature oSigTranslated[Globals::kMaxOpCount];
+  InstDB::OpFlags combinedOpFlags = InstDB::OpFlags::kNone;
+  uint32_t combinedRegMask = 0;
+  const Mem* memOp = nullptr;
+
+  for (i = 0; i < opCount; i++) {
+    const Operand_& op = operands[i];
+    if (op.opType() == OperandType::kNone)
+      break;
+
+    InstDB::OpFlags opFlags = InstDB::OpFlags::kNone;
+    RegMask regMask = 0;
+
+    switch (op.opType()) {
+      case OperandType::kReg: {
+        RegType regType = op.as<BaseReg>().type();
+        opFlags = _x86OpFlagFromRegType[size_t(regType)];
+
+        if (ASMJIT_UNLIKELY(opFlags == InstDB::OpFlags::kNone))
+          return DebugUtils::errored(kErrorInvalidRegType);
+
+        // If `regId` is equal or greater than Operand::kVirtIdMin it means that the register is virtual and its
+        // index will be assigned later by the register allocator. We must pass unless asked to disallow virtual
+        // registers.
+        uint32_t regId = op.id();
+        if (regId < Operand::kVirtIdMin) {
+          if (ASMJIT_UNLIKELY(regId >= 32))
+            return DebugUtils::errored(kErrorInvalidPhysId);
+
+          if (ASMJIT_UNLIKELY(Support::bitTest(vd->allowedRegMask[size_t(regType)], regId) == 0))
+            return DebugUtils::errored(kErrorInvalidPhysId);
+
+          regMask = Support::bitMask(regId);
+          combinedRegMask |= regMask;
+        }
+        else {
+          if (uint32_t(validationFlags & ValidationFlags::kEnableVirtRegs) == 0)
+            return DebugUtils::errored(kErrorIllegalVirtReg);
+          regMask = 0xFFFFFFFFu;
+        }
+        break;
+      }
+
+      // TODO: Validate base and index and combine these with `combinedRegMask`.
+      case OperandType::kMem: {
+        const Mem& m = op.as<Mem>();
+        memOp = &m;
+
+        uint32_t memSize = m.size();
+        RegType baseType = m.baseType();
+        RegType indexType = m.indexType();
+
+        if (m.segmentId() > 6)
+          return DebugUtils::errored(kErrorInvalidSegment);
+
+        // Validate AVX-512 broadcast {1tox}.
+        if (m.hasBroadcast()) {
+          if (memSize != 0) {
+            // If the size is specified it has to match the broadcast size.
+            if (ASMJIT_UNLIKELY(commonInfo.hasAvx512B32() && memSize != 4))
+              return DebugUtils::errored(kErrorInvalidBroadcast);
+
+            if (ASMJIT_UNLIKELY(commonInfo.hasAvx512B64() && memSize != 8))
+              return DebugUtils::errored(kErrorInvalidBroadcast);
+          }
+          else {
+            // If there is no size we implicitly calculate it so we can validate N in {1toN} properly.
+            memSize = commonInfo.hasAvx512B64() ? 8 :
+                      commonInfo.hasAvx512B32() ? 4 : 2;
+          }
+
+          memSize <<= uint32_t(m.getBroadcast());
+        }
+
+        if (baseType != RegType::kNone && baseType > RegType::kLabelTag) {
+          uint32_t baseId = m.baseId();
+
+          if (m.isRegHome()) {
+            // Home address of a virtual register. In such case we don't want to validate the type of the
+            // base register as it will always be patched to ESP|RSP.
+          }
+          else {
+            if (ASMJIT_UNLIKELY(!Support::bitTest(vd->allowedMemBaseRegs, baseType)))
+              return DebugUtils::errored(kErrorInvalidAddress);
+          }
+
+          // Create information that will be validated only if this is an implicit memory operand. Basically
+          // only usable for string instructions and other instructions where memory operand is implicit and
+          // has 'seg:[reg]' form.
+          if (baseId < Operand::kVirtIdMin) {
+            if (ASMJIT_UNLIKELY(baseId >= 32))
+              return DebugUtils::errored(kErrorInvalidPhysId);
+
+            // Physical base id.
+            regMask = Support::bitMask(baseId);
+            combinedRegMask |= regMask;
+          }
+          else {
+            // Virtual base id - fill the whole mask for implicit mem validation. The register is not assigned
+            // yet, so we cannot predict the phys id.
+            if (uint32_t(validationFlags & ValidationFlags::kEnableVirtRegs) == 0)
+              return DebugUtils::errored(kErrorIllegalVirtReg);
+            regMask = 0xFFFFFFFFu;
+          }
+
+          if (indexType == RegType::kNone && !m.offsetLo32())
+            opFlags |= InstDB::OpFlags::kFlagMemBase;
+        }
+        else if (baseType == RegType::kLabelTag) {
+          // [Label] - there is no need to validate the base as it's label.
+        }
+        else {
+          // Base is a 64-bit address.
+          int64_t offset = m.offset();
+          if (!Support::isInt32(offset)) {
+            if (mode == InstDB::Mode::kX86) {
+              // 32-bit mode: Make sure that the address is either `int32_t` or `uint32_t`.
+              if (!Support::isUInt32(offset))
+                return DebugUtils::errored(kErrorInvalidAddress64Bit);
+            }
+            else {
+              // 64-bit mode: Zero extension is allowed if the address has 32-bit index register or the address
+              // has no index register (it's still encodable).
+              if (indexType != RegType::kNone) {
+                if (!Support::isUInt32(offset))
+                  return DebugUtils::errored(kErrorInvalidAddress64Bit);
+
+                if (indexType != RegType::kX86_Gpd)
+                  return DebugUtils::errored(kErrorInvalidAddress64BitZeroExtension);
+              }
+              else {
+                // We don't validate absolute 64-bit addresses without an index register as this also depends
+                // on the target's base address. We don't have the information to do it at this moment.
+              }
+            }
+          }
+        }
+
+        if (indexType != RegType::kNone) {
+          if (ASMJIT_UNLIKELY(!Support::bitTest(vd->allowedMemIndexRegs, indexType)))
+            return DebugUtils::errored(kErrorInvalidAddress);
+
+          if (indexType == RegType::kX86_Xmm) {
+            opFlags |= InstDB::OpFlags::kVm32x | InstDB::OpFlags::kVm64x;
+          }
+          else if (indexType == RegType::kX86_Ymm) {
+            opFlags |= InstDB::OpFlags::kVm32y | InstDB::OpFlags::kVm64y;
+          }
+          else if (indexType == RegType::kX86_Zmm) {
+            opFlags |= InstDB::OpFlags::kVm32z | InstDB::OpFlags::kVm64z;
+          }
+          else {
+            if (baseType != RegType::kNone)
+              opFlags |= InstDB::OpFlags::kFlagMib;
+          }
+
+          // [RIP + {XMM|YMM|ZMM}] is not allowed.
+          if (baseType == RegType::kX86_Rip && Support::test(opFlags, InstDB::OpFlags::kVmMask))
+            return DebugUtils::errored(kErrorInvalidAddress);
+
+          uint32_t indexId = m.indexId();
+          if (indexId < Operand::kVirtIdMin) {
+            if (ASMJIT_UNLIKELY(indexId >= 32))
+              return DebugUtils::errored(kErrorInvalidPhysId);
+
+            combinedRegMask |= Support::bitMask(indexId);
+          }
+          else {
+            if (uint32_t(validationFlags & ValidationFlags::kEnableVirtRegs) == 0)
+              return DebugUtils::errored(kErrorIllegalVirtReg);
+          }
+
+          // Only used for implicit memory operands having 'seg:[reg]' form, so clear it.
+          regMask = 0;
+        }
+
+        switch (memSize) {
+          case  0: opFlags |= InstDB::OpFlags::kMemUnspecified; break;
+          case  1: opFlags |= InstDB::OpFlags::kMem8; break;
+          case  2: opFlags |= InstDB::OpFlags::kMem16; break;
+          case  4: opFlags |= InstDB::OpFlags::kMem32; break;
+          case  6: opFlags |= InstDB::OpFlags::kMem48; break;
+          case  8: opFlags |= InstDB::OpFlags::kMem64; break;
+          case 10: opFlags |= InstDB::OpFlags::kMem80; break;
+          case 16: opFlags |= InstDB::OpFlags::kMem128; break;
+          case 32: opFlags |= InstDB::OpFlags::kMem256; break;
+          case 64: opFlags |= InstDB::OpFlags::kMem512; break;
+
+          default:
+            return DebugUtils::errored(kErrorInvalidOperandSize);
+        }
+
+        break;
+      }
+
+      case OperandType::kImm: {
+        uint64_t immValue = op.as<Imm>().valueAs<uint64_t>();
+
+        if (int64_t(immValue) >= 0) {
+          if (immValue <= 0x7u)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmU32 |
+                      InstDB::OpFlags::kImmI16 | InstDB::OpFlags::kImmU16 | InstDB::OpFlags::kImmI8  | InstDB::OpFlags::kImmU8  |
+                      InstDB::OpFlags::kImmI4  | InstDB::OpFlags::kImmU4  ;
+          else if (immValue <= 0xFu)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmU32 |
+                      InstDB::OpFlags::kImmI16 | InstDB::OpFlags::kImmU16 | InstDB::OpFlags::kImmI8  | InstDB::OpFlags::kImmU8  |
+                      InstDB::OpFlags::kImmU4  ;
+          else if (immValue <= 0x7Fu)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmU32 |
+                      InstDB::OpFlags::kImmI16 | InstDB::OpFlags::kImmU16 | InstDB::OpFlags::kImmI8  | InstDB::OpFlags::kImmU8  ;
+          else if (immValue <= 0xFFu)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmU32 |
+                      InstDB::OpFlags::kImmI16 | InstDB::OpFlags::kImmU16 | InstDB::OpFlags::kImmU8  ;
+          else if (immValue <= 0x7FFFu)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmU32 |
+                      InstDB::OpFlags::kImmI16 | InstDB::OpFlags::kImmU16 ;
+          else if (immValue <= 0xFFFFu)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmU32 |
+                      InstDB::OpFlags::kImmU16 ;
+          else if (immValue <= 0x7FFFFFFFu)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmU32;
+          else if (immValue <= 0xFFFFFFFFu)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64 | InstDB::OpFlags::kImmU32;
+          else if (immValue <= 0x7FFFFFFFFFFFFFFFu)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64;
+          else
+            opFlags = InstDB::OpFlags::kImmU64;
+        }
+        else {
+          immValue = Support::neg(immValue);
+          if (immValue <= 0x8u)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmI16 | InstDB::OpFlags::kImmI8 | InstDB::OpFlags::kImmI4;
+          else if (immValue <= 0x80u)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmI16 | InstDB::OpFlags::kImmI8;
+          else if (immValue <= 0x8000u)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmI16;
+          else if (immValue <= 0x80000000u)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmI32;
+          else
+            opFlags = InstDB::OpFlags::kImmI64;
+        }
+        break;
+      }
+
+      case OperandType::kLabel: {
+        opFlags |= InstDB::OpFlags::kRel8 | InstDB::OpFlags::kRel32;
+        break;
+      }
+
+      default:
+        return DebugUtils::errored(kErrorInvalidState);
+    }
+
+    InstDB::OpSignature& oSigDst = oSigTranslated[i];
+    oSigDst._flags = uint64_t(opFlags) & 0x00FFFFFFFFFFFFFFu;
+    oSigDst._regMask = uint8_t(regMask & 0xFFu);
+    combinedOpFlags |= opFlags;
+  }
+
+  // Decrease the number of operands of those that are none. This is important as Assembler and Compiler may just pass
+  // more operands padded with none (which means that no operand is given at that index). However, validate that there
+  // are no gaps (like [reg, none, reg] or [none, reg]).
+  if (i < opCount) {
+    while (--opCount > i)
+      if (ASMJIT_UNLIKELY(!operands[opCount].isNone()))
+        return DebugUtils::errored(kErrorInvalidInstruction);
+  }
+
+  // Validate X86 and X64 specific cases.
+  if (mode == InstDB::Mode::kX86) {
+    // Illegal use of 64-bit register in 32-bit mode.
+    if (ASMJIT_UNLIKELY(Support::test(combinedOpFlags, InstDB::OpFlags::kRegGpq)))
+      return DebugUtils::errored(kErrorInvalidUseOfGpq);
+  }
+  else {
+    // Illegal use of a high 8-bit register with REX prefix.
+    bool hasREX = inst.hasOption(InstOptions::kX86_Rex) || (combinedRegMask & 0xFFFFFF00u) != 0;
+    if (ASMJIT_UNLIKELY(hasREX && Support::test(combinedOpFlags, InstDB::OpFlags::kRegGpbHi)))
+      return DebugUtils::errored(kErrorInvalidUseOfGpbHi);
+  }
+
+  // Validate Instruction Signature by Comparing Against All `iSig` Rows
+  // -------------------------------------------------------------------
+
+  const InstDB::InstSignature* iSig = InstDB::_instSignatureTable + commonInfo._iSignatureIndex;
+  const InstDB::InstSignature* iEnd = iSig + commonInfo._iSignatureCount;
+
+  if (iSig != iEnd) {
+    const InstDB::OpSignature* opSignatureTable = InstDB::_opSignatureTable;
+
+    // If set it means that we matched a signature where only immediate value
+    // was out of bounds. We can return a more descriptive error if we know this.
+    bool globalImmOutOfRange = false;
+
+    do {
+      // Check if the architecture is compatible.
+      if (!iSig->supportsMode(mode))
+        continue;
+
+      // Compare the operands table with reference operands.
+      uint32_t j = 0;
+      uint32_t iSigCount = iSig->opCount();
+      bool localImmOutOfRange = false;
+
+      if (iSigCount == opCount) {
+        for (j = 0; j < opCount; j++)
+          if (!x86CheckOSig(oSigTranslated[j], iSig->opSignature(j), localImmOutOfRange))
+            break;
+      }
+      else if (iSigCount - iSig->implicitOpCount() == opCount) {
+        uint32_t r = 0;
+        for (j = 0; j < opCount && r < iSigCount; j++, r++) {
+          const InstDB::OpSignature* oChk = oSigTranslated + j;
+          const InstDB::OpSignature* oRef;
+Next:
+          oRef = opSignatureTable + iSig->opSignatureIndex(r);
+          // Skip implicit operands.
+          if (oRef->isImplicit()) {
+            if (++r >= iSigCount)
+              break;
+            else
+              goto Next;
+          }
+
+          if (!x86CheckOSig(*oChk, *oRef, localImmOutOfRange))
+            break;
+        }
+      }
+
+      if (j == opCount) {
+        if (!localImmOutOfRange) {
+          // Match, must clear possible `globalImmOutOfRange`.
+          globalImmOutOfRange = false;
+          break;
+        }
+        globalImmOutOfRange = localImmOutOfRange;
+      }
+    } while (++iSig != iEnd);
+
+    if (iSig == iEnd) {
+      if (globalImmOutOfRange)
+        return DebugUtils::errored(kErrorInvalidImmediate);
+      else
+        return DebugUtils::errored(kErrorInvalidInstruction);
+    }
+  }
+
+  // Validate AVX512 Options
+  // -----------------------
+
+  const RegOnly& extraReg = inst.extraReg();
+
+  if (Support::test(options, kAvx512Options)) {
+    if (commonInfo.hasFlag(InstDB::InstFlags::kEvex)) {
+      // Validate AVX-512 {z}.
+      if (Support::test(options, InstOptions::kX86_ZMask)) {
+        if (ASMJIT_UNLIKELY(Support::test(options, InstOptions::kX86_ZMask) && !commonInfo.hasAvx512Z()))
+          return DebugUtils::errored(kErrorInvalidKZeroUse);
+      }
+
+      // Validate AVX-512 {sae} and {er}.
+      if (Support::test(options, InstOptions::kX86_SAE | InstOptions::kX86_ER)) {
+        // Rounding control is impossible if the instruction is not reg-to-reg.
+        if (ASMJIT_UNLIKELY(memOp))
+          return DebugUtils::errored(kErrorInvalidEROrSAE);
+
+        // Check if {sae} or {er} is supported by the instruction.
+        if (Support::test(options, InstOptions::kX86_ER)) {
+          // NOTE: if both {sae} and {er} are set, we don't care, as {sae} is implied.
+          if (ASMJIT_UNLIKELY(!commonInfo.hasAvx512ER()))
+            return DebugUtils::errored(kErrorInvalidEROrSAE);
+        }
+        else {
+          if (ASMJIT_UNLIKELY(!commonInfo.hasAvx512SAE()))
+            return DebugUtils::errored(kErrorInvalidEROrSAE);
+        }
+
+        // {sae} and {er} are defined for either scalar ops or vector ops that require LL to be 10 (512-bit vector
+        // operations). We don't need any more bits in the instruction database to be able to validate this, as
+        // each AVX512 instruction that has broadcast is vector instruction (in this case we require zmm registers),
+        // otherwise it's a scalar instruction, which is valid.
+        if (commonInfo.hasAvx512B()) {
+          // Supports broadcast, thus we require LL to be '10', which means there have to be ZMM registers used. We
+          // don't calculate LL here, but we know that it would be '10' if there is at least one ZMM register used.
+
+          // There is no {er}/{sae}-enabled instruction with less than two operands.
+          ASMJIT_ASSERT(opCount >= 2);
+          if (ASMJIT_UNLIKELY(!x86IsZmmOrM512(operands[0]) && !x86IsZmmOrM512(operands[1])))
+            return DebugUtils::errored(kErrorInvalidEROrSAE);
+        }
+      }
+    }
+    else {
+      // Not an AVX512 instruction - maybe OpExtra is xCX register used by REP/REPNE prefix.
+      if (Support::test(options, kAvx512Options) || !Support::test(options, kRepAny))
+        return DebugUtils::errored(kErrorInvalidInstruction);
+    }
+  }
+
+  // Validate {Extra} Register
+  // -------------------------
+
+  if (extraReg.isReg()) {
+    if (Support::test(options, kRepAny)) {
+      // Validate REP|REPNE {cx|ecx|rcx}.
+      if (ASMJIT_UNLIKELY(Support::test(iFlags, InstDB::InstFlags::kRepIgnored)))
+        return DebugUtils::errored(kErrorInvalidExtraReg);
+
+      if (extraReg.isPhysReg()) {
+        if (ASMJIT_UNLIKELY(extraReg.id() != Gp::kIdCx))
+          return DebugUtils::errored(kErrorInvalidExtraReg);
+      }
+
+      // The type of the {...} register must match the type of the base register
+      // of memory operand. So if the memory operand uses 32-bit register the
+      // count register must also be 32-bit, etc...
+      if (ASMJIT_UNLIKELY(!memOp || extraReg.type() != memOp->baseType()))
+        return DebugUtils::errored(kErrorInvalidExtraReg);
+    }
+    else if (commonInfo.hasFlag(InstDB::InstFlags::kEvex)) {
+      // Validate AVX-512 {k}.
+      if (ASMJIT_UNLIKELY(extraReg.type() != RegType::kX86_KReg))
+        return DebugUtils::errored(kErrorInvalidExtraReg);
+
+      if (ASMJIT_UNLIKELY(extraReg.id() == 0 || !commonInfo.hasAvx512K()))
+        return DebugUtils::errored(kErrorInvalidKMaskUse);
+    }
+    else {
+      return DebugUtils::errored(kErrorInvalidExtraReg);
+    }
+  }
+
+  return kErrorOk;
+}
+#endif // !ASMJIT_NO_VALIDATION
+
+// x86::InstInternal - QueryRWInfo
+// ===============================
+
+#ifndef ASMJIT_NO_INTROSPECTION
+static const Support::Array<uint64_t, uint32_t(RegGroup::kMaxValue) + 1> rwRegGroupByteMask = {{
+  0x00000000000000FFu, // GP.
+  0xFFFFFFFFFFFFFFFFu, // XMM|YMM|ZMM.
+  0x00000000000000FFu, // MM.
+  0x00000000000000FFu, // KReg.
+  0x0000000000000003u, // SReg.
+  0x00000000000000FFu, // CReg.
+  0x00000000000000FFu, // DReg.
+  0x00000000000003FFu, // St().
+  0x000000000000FFFFu, // BND.
+  0x00000000000000FFu  // RIP.
+}};
+
+static ASMJIT_FORCE_INLINE void rwZeroExtendGp(OpRWInfo& opRwInfo, const Gp& reg, uint32_t nativeGpSize) noexcept {
+  ASMJIT_ASSERT(BaseReg::isGp(reg.as<Operand>()));
+  if (reg.size() + 4 == nativeGpSize) {
+    opRwInfo.addOpFlags(OpRWFlags::kZExt);
+    opRwInfo.setExtendByteMask(~opRwInfo.writeByteMask() & 0xFFu);
+  }
+}
+
+static ASMJIT_FORCE_INLINE void rwZeroExtendAvxVec(OpRWInfo& opRwInfo, const Vec& reg) noexcept {
+  DebugUtils::unused(reg);
+
+  uint64_t msk = ~Support::fillTrailingBits(opRwInfo.writeByteMask());
+  if (msk) {
+    opRwInfo.addOpFlags(OpRWFlags::kZExt);
+    opRwInfo.setExtendByteMask(msk);
+  }
+}
+
+static ASMJIT_FORCE_INLINE void rwZeroExtendNonVec(OpRWInfo& opRwInfo, const Reg& reg) noexcept {
+  uint64_t msk = ~Support::fillTrailingBits(opRwInfo.writeByteMask()) & rwRegGroupByteMask[reg.group()];
+  if (msk) {
+    opRwInfo.addOpFlags(OpRWFlags::kZExt);
+    opRwInfo.setExtendByteMask(msk);
+  }
+}
+
+static ASMJIT_FORCE_INLINE Error rwHandleAVX512(const BaseInst& inst, const InstDB::CommonInfo& commonInfo, InstRWInfo* out) noexcept {
+  if (inst.hasExtraReg() && inst.extraReg().type() == RegType::kX86_KReg && out->opCount() > 0) {
+    // AVX-512 instruction that uses a destination with {k} register (zeroing vs masking).
+    out->_extraReg.addOpFlags(OpRWFlags::kRead);
+    out->_extraReg.setReadByteMask(0xFF);
+    if (!inst.hasOption(InstOptions::kX86_ZMask) && !commonInfo.hasAvx512Flag(InstDB::Avx512Flags::kImplicitZ)) {
+      out->_operands[0].addOpFlags(OpRWFlags::kRead);
+      out->_operands[0]._readByteMask |= out->_operands[0]._writeByteMask;
+    }
+  }
+
+  return kErrorOk;
+}
+
+static ASMJIT_FORCE_INLINE bool hasSameRegType(const BaseReg* regs, size_t opCount) noexcept {
+  ASMJIT_ASSERT(opCount > 0);
+  RegType regType = regs[0].type();
+  for (size_t i = 1; i < opCount; i++)
+    if (regs[i].type() != regType)
+      return false;
+  return true;
+}
+
+Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept {
+  // Only called when `arch` matches X86 family.
+  ASMJIT_ASSERT(Environment::isFamilyX86(arch));
+
+  // Get the instruction data.
+  InstId instId = inst.id();
+  if (ASMJIT_UNLIKELY(!Inst::isDefinedId(instId)))
+    return DebugUtils::errored(kErrorInvalidInstruction);
+
+  // Read/Write flags.
+  const InstDB::InstInfo& instInfo = InstDB::_instInfoTable[instId];
+  const InstDB::CommonInfo& commonInfo = InstDB::_commonInfoTable[instInfo._commonInfoIndex];
+  const InstDB::AdditionalInfo& additionalInfo = InstDB::_additionalInfoTable[instInfo._additionalInfoIndex];
+  const InstDB::RWFlagsInfoTable& rwFlags = InstDB::_rwFlagsInfoTable[additionalInfo._rwFlagsIndex];
+
+  // There are two data tables, one for `opCount == 2` and the second for
+  // `opCount != 2`. There are two reasons for that:
+  //   - There are instructions that share the same name that have both 2 or 3 operands, which have different
+  //     RW information / semantics.
+  //   - There must be 2 tables otherwise the lookup index won't fit into 8 bits (there is more than 256 records
+  //     of combined rwInfo A and B).
+  const InstDB::RWInfo& instRwInfo = opCount == 2 ? InstDB::rwInfoA[InstDB::rwInfoIndexA[instId]]
+                                                  : InstDB::rwInfoB[InstDB::rwInfoIndexB[instId]];
+  const InstDB::RWInfoRm& instRmInfo = InstDB::rwInfoRm[instRwInfo.rmInfo];
+
+  out->_instFlags = InstDB::_instFlagsTable[additionalInfo._instFlagsIndex];
+  out->_opCount = uint8_t(opCount);
+  out->_rmFeature = instRmInfo.rmFeature;
+  out->_extraReg.reset();
+  out->_readFlags = CpuRWFlags(rwFlags.readFlags);
+  out->_writeFlags = CpuRWFlags(rwFlags.writeFlags);
+
+  uint32_t opTypeMask = 0u;
+  uint32_t nativeGpSize = Environment::registerSizeFromArch(arch);
+
+  constexpr OpRWFlags R = OpRWFlags::kRead;
+  constexpr OpRWFlags W = OpRWFlags::kWrite;
+  constexpr OpRWFlags X = OpRWFlags::kRW;
+  constexpr OpRWFlags RegM = OpRWFlags::kRegMem;
+  constexpr OpRWFlags RegPhys = OpRWFlags::kRegPhysId;
+  constexpr OpRWFlags MibRead = OpRWFlags::kMemBaseRead | OpRWFlags::kMemIndexRead;
+
+  if (instRwInfo.category == InstDB::RWInfo::kCategoryGeneric) {
+    uint32_t i;
+    uint32_t rmOpsMask = 0;
+    uint32_t rmMaxSize = 0;
+
+    for (i = 0; i < opCount; i++) {
+      OpRWInfo& op = out->_operands[i];
+      const Operand_& srcOp = operands[i];
+      const InstDB::RWInfoOp& rwOpData = InstDB::rwInfoOp[instRwInfo.opInfoIndex[i]];
+
+      opTypeMask |= Support::bitMask(srcOp.opType());
+
+      if (!srcOp.isRegOrMem()) {
+        op.reset();
+        continue;
+      }
+
+      op._opFlags = rwOpData.flags & ~OpRWFlags::kZExt;
+      op._physId = rwOpData.physId;
+      op._rmSize = 0;
+      op._resetReserved();
+
+      uint64_t rByteMask = rwOpData.rByteMask;
+      uint64_t wByteMask = rwOpData.wByteMask;
+
+      if (op.isRead()  && !rByteMask) rByteMask = Support::lsbMask<uint64_t>(srcOp.size());
+      if (op.isWrite() && !wByteMask) wByteMask = Support::lsbMask<uint64_t>(srcOp.size());
+
+      op._readByteMask = rByteMask;
+      op._writeByteMask = wByteMask;
+      op._extendByteMask = 0;
+      op._consecutiveLeadCount = rwOpData.consecutiveLeadCount;
+
+      if (srcOp.isReg()) {
+        // Zero extension.
+        if (op.isWrite()) {
+          if (srcOp.as<Reg>().isGp()) {
+            // GP registers on X64 are special:
+            //   - 8-bit and 16-bit writes aren't zero extended.
+            //   - 32-bit writes ARE zero extended.
+            rwZeroExtendGp(op, srcOp.as<Gp>(), nativeGpSize);
+          }
+          else if (Support::test(rwOpData.flags, OpRWFlags::kZExt)) {
+            // Otherwise follow ZExt.
+            rwZeroExtendNonVec(op, srcOp.as<Gp>());
+          }
+        }
+
+        // Aggregate values required to calculate valid Reg/M info.
+        rmMaxSize  = Support::max(rmMaxSize, srcOp.size());
+        rmOpsMask |= Support::bitMask<uint32_t>(i);
+      }
+      else {
+        const x86::Mem& memOp = srcOp.as<x86::Mem>();
+        // The RW flags of BASE+INDEX are either provided by the data, which means
+        // that the instruction is border-case, or they are deduced from the operand.
+        if (memOp.hasBaseReg() && !op.hasOpFlag(OpRWFlags::kMemBaseRW))
+          op.addOpFlags(OpRWFlags::kMemBaseRead);
+        if (memOp.hasIndexReg() && !op.hasOpFlag(OpRWFlags::kMemIndexRW))
+          op.addOpFlags(OpRWFlags::kMemIndexRead);
+      }
+    }
+
+    // Only keep kMovOp if the instruction is actually register to register move of the same kind.
+    if (out->hasInstFlag(InstRWFlags::kMovOp)) {
+      if (!(opCount >= 2 && opTypeMask == Support::bitMask(OperandType::kReg) && hasSameRegType(reinterpret_cast<const BaseReg*>(operands), opCount)))
+        out->_instFlags &= ~InstRWFlags::kMovOp;
+    }
+
+    // Special cases require more logic.
+    if (instRmInfo.flags & (InstDB::RWInfoRm::kFlagMovssMovsd | InstDB::RWInfoRm::kFlagPextrw | InstDB::RWInfoRm::kFlagFeatureIfRMI)) {
+      if (instRmInfo.flags & InstDB::RWInfoRm::kFlagMovssMovsd) {
+        if (opCount == 2) {
+          if (operands[0].isReg() && operands[1].isReg()) {
+            // Doesn't zero extend the destination.
+            out->_operands[0]._extendByteMask = 0;
+          }
+        }
+      }
+      else if (instRmInfo.flags & InstDB::RWInfoRm::kFlagPextrw) {
+        if (opCount == 3 && Reg::isMm(operands[1])) {
+          out->_rmFeature = 0;
+          rmOpsMask = 0;
+        }
+      }
+      else if (instRmInfo.flags & InstDB::RWInfoRm::kFlagFeatureIfRMI) {
+        if (opCount != 3 || !operands[2].isImm()) {
+          out->_rmFeature = 0;
+        }
+      }
+    }
+
+    rmOpsMask &= instRmInfo.rmOpsMask;
+    if (rmOpsMask) {
+      Support::BitWordIterator<uint32_t> it(rmOpsMask);
+      do {
+        i = it.next();
+
+        OpRWInfo& op = out->_operands[i];
+        op.addOpFlags(RegM);
+
+        switch (instRmInfo.category) {
+          case InstDB::RWInfoRm::kCategoryFixed:
+            op.setRmSize(instRmInfo.fixedSize);
+            break;
+          case InstDB::RWInfoRm::kCategoryConsistent:
+            op.setRmSize(operands[i].size());
+            break;
+          case InstDB::RWInfoRm::kCategoryHalf:
+            op.setRmSize(rmMaxSize / 2u);
+            break;
+          case InstDB::RWInfoRm::kCategoryQuarter:
+            op.setRmSize(rmMaxSize / 4u);
+            break;
+          case InstDB::RWInfoRm::kCategoryEighth:
+            op.setRmSize(rmMaxSize / 8u);
+            break;
+        }
+      } while (it.hasNext());
+    }
+
+    return rwHandleAVX512(inst, commonInfo, out);
+  }
+
+  switch (instRwInfo.category) {
+    case InstDB::RWInfo::kCategoryMov: {
+      // Special case for 'mov' instruction. Here there are some variants that we have to handle as 'mov' can be
+      // used to move between GP, segment, control and debug registers. Moving between GP registers also allow to
+      // use memory operand.
+
+      // We will again set the flag if it's actually a move from GP to GP register, otherwise this flag cannot be set.
+      out->_instFlags &= ~InstRWFlags::kMovOp;
+
+      if (opCount == 2) {
+        if (operands[0].isReg() && operands[1].isReg()) {
+          const Reg& o0 = operands[0].as<Reg>();
+          const Reg& o1 = operands[1].as<Reg>();
+
+          if (o0.isGp() && o1.isGp()) {
+            out->_operands[0].reset(W | RegM, operands[0].size());
+            out->_operands[1].reset(R | RegM, operands[1].size());
+
+            rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+            out->_instFlags |= InstRWFlags::kMovOp;
+            return kErrorOk;
+          }
+
+          if (o0.isGp() && o1.isSReg()) {
+            out->_operands[0].reset(W | RegM, nativeGpSize);
+            out->_operands[0].setRmSize(2);
+            out->_operands[1].reset(R, 2);
+            return kErrorOk;
+          }
+
+          if (o0.isSReg() && o1.isGp()) {
+            out->_operands[0].reset(W, 2);
+            out->_operands[1].reset(R | RegM, 2);
+            out->_operands[1].setRmSize(2);
+            return kErrorOk;
+          }
+
+          if (o0.isGp() && (o1.isCReg() || o1.isDReg())) {
+            out->_operands[0].reset(W, nativeGpSize);
+            out->_operands[1].reset(R, nativeGpSize);
+            out->_writeFlags = CpuRWFlags::kX86_OF |
+                               CpuRWFlags::kX86_SF |
+                               CpuRWFlags::kX86_ZF |
+                               CpuRWFlags::kX86_AF |
+                               CpuRWFlags::kX86_PF |
+                               CpuRWFlags::kX86_CF;
+            return kErrorOk;
+          }
+
+          if ((o0.isCReg() || o0.isDReg()) && o1.isGp()) {
+            out->_operands[0].reset(W, nativeGpSize);
+            out->_operands[1].reset(R, nativeGpSize);
+            out->_writeFlags = CpuRWFlags::kX86_OF |
+                               CpuRWFlags::kX86_SF |
+                               CpuRWFlags::kX86_ZF |
+                               CpuRWFlags::kX86_AF |
+                               CpuRWFlags::kX86_PF |
+                               CpuRWFlags::kX86_CF;
+            return kErrorOk;
+          }
+        }
+
+        if (operands[0].isReg() && operands[1].isMem()) {
+          const Reg& o0 = operands[0].as<Reg>();
+          const Mem& o1 = operands[1].as<Mem>();
+
+          if (o0.isGp()) {
+            if (!o1.isOffset64Bit())
+              out->_operands[0].reset(W, o0.size());
+            else
+              out->_operands[0].reset(W | RegPhys, o0.size(), Gp::kIdAx);
+
+            out->_operands[1].reset(R | MibRead, o0.size());
+            rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+            return kErrorOk;
+          }
+
+          if (o0.isSReg()) {
+            out->_operands[0].reset(W, 2);
+            out->_operands[1].reset(R, 2);
+            return kErrorOk;
+          }
+        }
+
+        if (operands[0].isMem() && operands[1].isReg()) {
+          const Mem& o0 = operands[0].as<Mem>();
+          const Reg& o1 = operands[1].as<Reg>();
+
+          if (o1.isGp()) {
+            out->_operands[0].reset(W | MibRead, o1.size());
+            if (!o0.isOffset64Bit())
+              out->_operands[1].reset(R, o1.size());
+            else
+              out->_operands[1].reset(R | RegPhys, o1.size(), Gp::kIdAx);
+            return kErrorOk;
+          }
+
+          if (o1.isSReg()) {
+            out->_operands[0].reset(W | MibRead, 2);
+            out->_operands[1].reset(R, 2);
+            return kErrorOk;
+          }
+        }
+
+        if (Reg::isGp(operands[0]) && operands[1].isImm()) {
+          const Reg& o0 = operands[0].as<Reg>();
+          out->_operands[0].reset(W | RegM, o0.size());
+          out->_operands[1].reset();
+
+          rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+          return kErrorOk;
+        }
+
+        if (operands[0].isMem() && operands[1].isImm()) {
+          const Reg& o0 = operands[0].as<Reg>();
+          out->_operands[0].reset(W | MibRead, o0.size());
+          out->_operands[1].reset();
+          return kErrorOk;
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryMovabs: {
+      if (opCount == 2) {
+        if (Reg::isGp(operands[0]) && operands[1].isMem()) {
+          const Reg& o0 = operands[0].as<Reg>();
+          out->_operands[0].reset(W | RegPhys, o0.size(), Gp::kIdAx);
+          out->_operands[1].reset(R | MibRead, o0.size());
+          rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+          return kErrorOk;
+        }
+
+        if (operands[0].isMem() && Reg::isGp(operands[1])) {
+          const Reg& o1 = operands[1].as<Reg>();
+          out->_operands[0].reset(W | MibRead, o1.size());
+          out->_operands[1].reset(R | RegPhys, o1.size(), Gp::kIdAx);
+          return kErrorOk;
+        }
+
+        if (Reg::isGp(operands[0]) && operands[1].isImm()) {
+          const Reg& o0 = operands[0].as<Reg>();
+          out->_operands[0].reset(W, o0.size());
+          out->_operands[1].reset();
+
+          rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+          return kErrorOk;
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryImul: {
+      // Special case for 'imul' instruction.
+      //
+      // There are 3 variants in general:
+      //
+      //   1. Standard multiplication: 'A = A * B'.
+      //   2. Multiplication with imm: 'A = B * C'.
+      //   3. Extended multiplication: 'A:B = B * C'.
+
+      if (opCount == 2) {
+        if (operands[0].isReg() && operands[1].isImm()) {
+          out->_operands[0].reset(X, operands[0].size());
+          out->_operands[1].reset();
+
+          rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+          return kErrorOk;
+        }
+
+        if (Reg::isGpw(operands[0]) && operands[1].size() == 1) {
+          // imul ax, r8/m8 <- AX = AL * r8/m8
+          out->_operands[0].reset(X | RegPhys, 2, Gp::kIdAx);
+          out->_operands[0].setReadByteMask(Support::lsbMask<uint64_t>(1));
+          out->_operands[1].reset(R | RegM, 1);
+        }
+        else {
+          // imul r?, r?/m?
+          out->_operands[0].reset(X, operands[0].size());
+          out->_operands[1].reset(R | RegM, operands[0].size());
+          rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+        }
+
+        if (operands[1].isMem())
+          out->_operands[1].addOpFlags(MibRead);
+        return kErrorOk;
+      }
+
+      if (opCount == 3) {
+        if (operands[2].isImm()) {
+          out->_operands[0].reset(W, operands[0].size());
+          out->_operands[1].reset(R | RegM, operands[1].size());
+          out->_operands[2].reset();
+
+          rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+          if (operands[1].isMem())
+            out->_operands[1].addOpFlags(MibRead);
+          return kErrorOk;
+        }
+        else {
+          out->_operands[0].reset(W | RegPhys, operands[0].size(), Gp::kIdDx);
+          out->_operands[1].reset(X | RegPhys, operands[1].size(), Gp::kIdAx);
+          out->_operands[2].reset(R | RegM, operands[2].size());
+
+          rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+          rwZeroExtendGp(out->_operands[1], operands[1].as<Gp>(), nativeGpSize);
+          if (operands[2].isMem())
+            out->_operands[2].addOpFlags(MibRead);
+          return kErrorOk;
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryMovh64: {
+      // Special case for 'movhpd|movhps' instructions. Note that this is only required for legacy (non-AVX)
+      // variants as AVX instructions use either 2 or 3 operands that are in `kCategoryGeneric` category.
+      if (opCount == 2) {
+        if (BaseReg::isVec(operands[0]) && operands[1].isMem()) {
+          out->_operands[0].reset(W, 8);
+          out->_operands[0].setWriteByteMask(Support::lsbMask<uint64_t>(8) << 8);
+          out->_operands[1].reset(R | MibRead, 8);
+          return kErrorOk;
+        }
+
+        if (operands[0].isMem() && BaseReg::isVec(operands[1])) {
+          out->_operands[0].reset(W | MibRead, 8);
+          out->_operands[1].reset(R, 8);
+          out->_operands[1].setReadByteMask(Support::lsbMask<uint64_t>(8) << 8);
+          return kErrorOk;
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryPunpcklxx: {
+      // Special case for 'punpcklbw|punpckldq|punpcklwd' instructions.
+      if (opCount == 2) {
+        if (Reg::isXmm(operands[0])) {
+          out->_operands[0].reset(X, 16);
+          out->_operands[0].setReadByteMask(0x0F0Fu);
+          out->_operands[0].setWriteByteMask(0xFFFFu);
+          out->_operands[1].reset(R, 16);
+          out->_operands[1].setWriteByteMask(0x0F0Fu);
+
+          if (Reg::isXmm(operands[1])) {
+            return kErrorOk;
+          }
+
+          if (operands[1].isMem()) {
+            out->_operands[1].addOpFlags(MibRead);
+            return kErrorOk;
+          }
+        }
+
+        if (Reg::isMm(operands[0])) {
+          out->_operands[0].reset(X, 8);
+          out->_operands[0].setReadByteMask(0x0Fu);
+          out->_operands[0].setWriteByteMask(0xFFu);
+          out->_operands[1].reset(R, 4);
+          out->_operands[1].setReadByteMask(0x0Fu);
+
+          if (Reg::isMm(operands[1])) {
+            return kErrorOk;
+          }
+
+          if (operands[1].isMem()) {
+            out->_operands[1].addOpFlags(MibRead);
+            return kErrorOk;
+          }
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryVmaskmov: {
+      // Special case for 'vmaskmovpd|vmaskmovps|vpmaskmovd|vpmaskmovq' instructions.
+      if (opCount == 3) {
+        if (BaseReg::isVec(operands[0]) && BaseReg::isVec(operands[1]) && operands[2].isMem()) {
+          out->_operands[0].reset(W, operands[0].size());
+          out->_operands[1].reset(R, operands[1].size());
+          out->_operands[2].reset(R | MibRead, operands[1].size());
+
+          rwZeroExtendAvxVec(out->_operands[0], operands[0].as<Vec>());
+          return kErrorOk;
+        }
+
+        if (operands[0].isMem() && BaseReg::isVec(operands[1]) && BaseReg::isVec(operands[2])) {
+          out->_operands[0].reset(X | MibRead, operands[1].size());
+          out->_operands[1].reset(R, operands[1].size());
+          out->_operands[2].reset(R, operands[2].size());
+          return kErrorOk;
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryVmovddup: {
+      // Special case for 'vmovddup' instruction. This instruction has an interesting semantic as 128-bit XMM
+      // version only uses 64-bit memory operand (m64), however, 256/512-bit versions use 256/512-bit memory
+      // operand, respectively.
+      if (opCount == 2) {
+        if (BaseReg::isVec(operands[0]) && BaseReg::isVec(operands[1])) {
+          uint32_t o0Size = operands[0].size();
+          uint32_t o1Size = o0Size == 16 ? 8 : o0Size;
+
+          out->_operands[0].reset(W, o0Size);
+          out->_operands[1].reset(R | RegM, o1Size);
+          out->_operands[1]._readByteMask &= 0x00FF00FF00FF00FFu;
+
+          rwZeroExtendAvxVec(out->_operands[0], operands[0].as<Vec>());
+          return rwHandleAVX512(inst, commonInfo, out);
+        }
+
+        if (BaseReg::isVec(operands[0]) && operands[1].isMem()) {
+          uint32_t o0Size = operands[0].size();
+          uint32_t o1Size = o0Size == 16 ? 8 : o0Size;
+
+          out->_operands[0].reset(W, o0Size);
+          out->_operands[1].reset(R | MibRead, o1Size);
+
+          rwZeroExtendAvxVec(out->_operands[0], operands[0].as<Vec>());
+          return rwHandleAVX512(inst, commonInfo, out);
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryVmovmskpd:
+    case InstDB::RWInfo::kCategoryVmovmskps: {
+      // Special case for 'vmovmskpd|vmovmskps' instructions.
+      if (opCount == 2) {
+        if (BaseReg::isGp(operands[0]) && BaseReg::isVec(operands[1])) {
+          out->_operands[0].reset(W, 1);
+          out->_operands[0].setExtendByteMask(Support::lsbMask<uint32_t>(nativeGpSize - 1) << 1);
+          out->_operands[1].reset(R, operands[1].size());
+          return kErrorOk;
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryVmov1_2:
+    case InstDB::RWInfo::kCategoryVmov1_4:
+    case InstDB::RWInfo::kCategoryVmov1_8: {
+      // Special case for instructions where the destination is 1:N (narrowing).
+      //
+      // Vmov1_2:
+      //   vcvtpd2dq|vcvttpd2dq
+      //   vcvtpd2udq|vcvttpd2udq
+      //   vcvtpd2ps|vcvtps2ph
+      //   vcvtqq2ps|vcvtuqq2ps
+      //   vpmovwb|vpmovswb|vpmovuswb
+      //   vpmovdw|vpmovsdw|vpmovusdw
+      //   vpmovqd|vpmovsqd|vpmovusqd
+      //
+      // Vmov1_4:
+      //   vpmovdb|vpmovsdb|vpmovusdb
+      //   vpmovqw|vpmovsqw|vpmovusqw
+      //
+      // Vmov1_8:
+      //   pmovmskb|vpmovmskb
+      //   vpmovqb|vpmovsqb|vpmovusqb
+      uint32_t shift = instRwInfo.category - InstDB::RWInfo::kCategoryVmov1_2 + 1;
+
+      if (opCount >= 2) {
+        if (opCount >= 3) {
+          if (opCount > 3)
+            return DebugUtils::errored(kErrorInvalidInstruction);
+          out->_operands[2].reset();
+        }
+
+        if (operands[0].isReg() && operands[1].isReg()) {
+          uint32_t size1 = operands[1].size();
+          uint32_t size0 = size1 >> shift;
+
+          out->_operands[0].reset(W, size0);
+          out->_operands[1].reset(R, size1);
+
+          if (instRmInfo.rmOpsMask & 0x1) {
+            out->_operands[0].addOpFlags(RegM);
+            out->_operands[0].setRmSize(size0);
+          }
+
+          if (instRmInfo.rmOpsMask & 0x2) {
+            out->_operands[1].addOpFlags(RegM);
+            out->_operands[1].setRmSize(size1);
+          }
+
+          // Handle 'pmovmskb|vpmovmskb'.
+          if (BaseReg::isGp(operands[0]))
+            rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+
+          if (BaseReg::isVec(operands[0]))
+            rwZeroExtendAvxVec(out->_operands[0], operands[0].as<Vec>());
+
+          return rwHandleAVX512(inst, commonInfo, out);
+        }
+
+        if (operands[0].isReg() && operands[1].isMem()) {
+          uint32_t size1 = operands[1].size() ? operands[1].size() : uint32_t(16);
+          uint32_t size0 = size1 >> shift;
+
+          out->_operands[0].reset(W, size0);
+          out->_operands[1].reset(R | MibRead, size1);
+          return kErrorOk;
+        }
+
+        if (operands[0].isMem() && operands[1].isReg()) {
+          uint32_t size1 = operands[1].size();
+          uint32_t size0 = size1 >> shift;
+
+          out->_operands[0].reset(W | MibRead, size0);
+          out->_operands[1].reset(R, size1);
+
+          return rwHandleAVX512(inst, commonInfo, out);
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryVmov2_1:
+    case InstDB::RWInfo::kCategoryVmov4_1:
+    case InstDB::RWInfo::kCategoryVmov8_1: {
+      // Special case for instructions where the destination is N:1 (widening).
+      //
+      // Vmov2_1:
+      //   vcvtdq2pd|vcvtudq2pd
+      //   vcvtps2pd|vcvtph2ps
+      //   vcvtps2qq|vcvtps2uqq
+      //   vcvttps2qq|vcvttps2uqq
+      //   vpmovsxbw|vpmovzxbw
+      //   vpmovsxwd|vpmovzxwd
+      //   vpmovsxdq|vpmovzxdq
+      //
+      // Vmov4_1:
+      //   vpmovsxbd|vpmovzxbd
+      //   vpmovsxwq|vpmovzxwq
+      //
+      // Vmov8_1:
+      //   vpmovsxbq|vpmovzxbq
+      uint32_t shift = instRwInfo.category - InstDB::RWInfo::kCategoryVmov2_1 + 1;
+
+      if (opCount >= 2) {
+        if (opCount >= 3) {
+          if (opCount > 3)
+            return DebugUtils::errored(kErrorInvalidInstruction);
+          out->_operands[2].reset();
+        }
+
+        uint32_t size0 = operands[0].size();
+        uint32_t size1 = size0 >> shift;
+
+        out->_operands[0].reset(W, size0);
+        out->_operands[1].reset(R, size1);
+
+        if (operands[0].isReg() && operands[1].isReg()) {
+          if (instRmInfo.rmOpsMask & 0x1) {
+            out->_operands[0].addOpFlags(RegM);
+            out->_operands[0].setRmSize(size0);
+          }
+
+          if (instRmInfo.rmOpsMask & 0x2) {
+            out->_operands[1].addOpFlags(RegM);
+            out->_operands[1].setRmSize(size1);
+          }
+
+          return rwHandleAVX512(inst, commonInfo, out);
+        }
+
+        if (operands[0].isReg() && operands[1].isMem()) {
+          out->_operands[1].addOpFlags(MibRead);
+
+          return rwHandleAVX512(inst, commonInfo, out);
+        }
+      }
+      break;
+    }
+  }
+
+  return DebugUtils::errored(kErrorInvalidInstruction);
+}
+#endif // !ASMJIT_NO_INTROSPECTION
+
+// x86::InstInternal - QueryFeatures
+// =================================
+
+#ifndef ASMJIT_NO_INTROSPECTION
+struct RegAnalysis {
+  uint32_t regTypeMask;
+  uint32_t highVecUsed;
+
+  inline bool hasRegType(RegType regType) const noexcept {
+    return Support::bitTest(regTypeMask, regType);
+  }
+};
+
+static RegAnalysis InstInternal_regAnalysis(const Operand_* operands, size_t opCount) noexcept {
+  uint32_t mask = 0;
+  uint32_t highVecUsed = 0;
+
+  for (uint32_t i = 0; i < opCount; i++) {
+    const Operand_& op = operands[i];
+    if (op.isReg()) {
+      const BaseReg& reg = op.as<BaseReg>();
+      mask |= Support::bitMask(reg.type());
+      if (reg.isVec())
+        highVecUsed |= uint32_t(reg.id() >= 16 && reg.id() < 32);
+    }
+    else if (op.isMem()) {
+      const BaseMem& mem = op.as<BaseMem>();
+      if (mem.hasBaseReg()) mask |= Support::bitMask(mem.baseType());
+      if (mem.hasIndexReg()) {
+        mask |= Support::bitMask(mem.indexType());
+        highVecUsed |= uint32_t(mem.indexId() >= 16 && mem.indexId() < 32);
+      }
+    }
+  }
+
+  return RegAnalysis { mask, highVecUsed };
+}
+
+static inline uint32_t InstInternal_usesAvx512(InstOptions instOptions, const RegOnly& extraReg, const RegAnalysis& regAnalysis) noexcept {
+  uint32_t hasEvex = uint32_t(instOptions & (InstOptions::kX86_Evex | InstOptions::kX86_AVX512Mask));
+  uint32_t hasKMask = extraReg.type() == RegType::kX86_KReg;
+  uint32_t hasKOrZmm = regAnalysis.regTypeMask & Support::bitMask(RegType::kX86_Zmm, RegType::kX86_KReg);
+
+  return hasEvex | hasKMask | hasKOrZmm;
+}
+
+Error InstInternal::queryFeatures(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, CpuFeatures* out) noexcept {
+  // Only called when `arch` matches X86 family.
+  DebugUtils::unused(arch);
+  ASMJIT_ASSERT(Environment::isFamilyX86(arch));
+
+  // Get the instruction data.
+  InstId instId = inst.id();
+  InstOptions options = inst.options();
+
+  if (ASMJIT_UNLIKELY(!Inst::isDefinedId(instId)))
+    return DebugUtils::errored(kErrorInvalidInstruction);
+
+  const InstDB::InstInfo& instInfo = InstDB::infoById(instId);
+  const InstDB::AdditionalInfo& additionalInfo = InstDB::_additionalInfoTable[instInfo._additionalInfoIndex];
+
+  const uint8_t* fData = additionalInfo.featuresBegin();
+  const uint8_t* fEnd = additionalInfo.featuresEnd();
+
+  // Copy all features to `out`.
+  out->reset();
+  do {
+    uint32_t feature = fData[0];
+    if (!feature)
+      break;
+    out->add(feature);
+  } while (++fData != fEnd);
+
+  // Since AsmJit aggregates instructions that share the same name we have to
+  // deal with some special cases and also with MMX/SSE and AVX/AVX2 overlaps.
+  if (fData != additionalInfo.featuresBegin()) {
+    RegAnalysis regAnalysis = InstInternal_regAnalysis(operands, opCount);
+
+    // Handle MMX vs SSE overlap.
+    if (out->has(CpuFeatures::X86::kMMX) || out->has(CpuFeatures::X86::kMMX2)) {
+      // Only instructions defined by SSE and SSE2 overlap. Instructions introduced by newer instruction sets like
+      // SSE3+ don't state MMX as they require SSE3+.
+      if (out->has(CpuFeatures::X86::kSSE) || out->has(CpuFeatures::X86::kSSE2)) {
+        if (!regAnalysis.hasRegType(RegType::kX86_Xmm)) {
+          // The instruction doesn't use XMM register(s), thus it's MMX/MMX2 only.
+          out->remove(CpuFeatures::X86::kSSE);
+          out->remove(CpuFeatures::X86::kSSE2);
+          out->remove(CpuFeatures::X86::kSSE4_1);
+        }
+        else {
+          out->remove(CpuFeatures::X86::kMMX);
+          out->remove(CpuFeatures::X86::kMMX2);
+        }
+
+        // Special case: PEXTRW instruction is MMX/SSE2 instruction. However, MMX/SSE version cannot access memory
+        // (only register to register extract) so when SSE4.1 introduced the whole family of PEXTR/PINSR instructions
+        // they also introduced PEXTRW with a new opcode 0x15 that can extract directly to memory. This instruction
+        // is, of course, not compatible with MMX/SSE2 and would #UD if SSE4.1 is not supported.
+        if (instId == Inst::kIdPextrw) {
+          if (opCount >= 1 && operands[0].isMem())
+            out->remove(CpuFeatures::X86::kSSE2);
+          else
+            out->remove(CpuFeatures::X86::kSSE4_1);
+        }
+      }
+    }
+
+    // Handle PCLMULQDQ vs VPCLMULQDQ.
+    if (out->has(CpuFeatures::X86::kVPCLMULQDQ)) {
+      if (regAnalysis.hasRegType(RegType::kX86_Zmm) || Support::test(options, InstOptions::kX86_Evex)) {
+        // AVX512_F & VPCLMULQDQ.
+        out->remove(CpuFeatures::X86::kAVX, CpuFeatures::X86::kPCLMULQDQ);
+      }
+      else if (regAnalysis.hasRegType(RegType::kX86_Ymm)) {
+        out->remove(CpuFeatures::X86::kAVX512_F, CpuFeatures::X86::kAVX512_VL);
+      }
+      else {
+        // AVX & PCLMULQDQ.
+        out->remove(CpuFeatures::X86::kAVX512_F, CpuFeatures::X86::kAVX512_VL, CpuFeatures::X86::kVPCLMULQDQ);
+      }
+    }
+
+    // Handle AVX vs AVX2 overlap.
+    if (out->has(CpuFeatures::X86::kAVX) && out->has(CpuFeatures::X86::kAVX2)) {
+      bool isAVX2 = true;
+      // Special case: VBROADCASTSS and VBROADCASTSD were introduced in AVX, but only version that uses memory as a
+      // source operand. AVX2 then added support for register source operand.
+      if (instId == Inst::kIdVbroadcastss || instId == Inst::kIdVbroadcastsd) {
+        if (opCount > 1 && operands[1].isMem())
+          isAVX2 = false;
+      }
+      else {
+        // AVX instruction set doesn't support integer operations on YMM registers as these were later introcuced by
+        // AVX2. In our case we have to check if YMM register(s) are in use and if that is the case this is an AVX2
+        // instruction.
+        if (!(regAnalysis.regTypeMask & Support::bitMask(RegType::kX86_Ymm, RegType::kX86_Zmm)))
+          isAVX2 = false;
+      }
+
+      if (isAVX2)
+        out->remove(CpuFeatures::X86::kAVX);
+      else
+        out->remove(CpuFeatures::X86::kAVX2);
+    }
+
+    // Handle AVX|AVX2|FMA|F16C vs AVX512 overlap.
+    if (out->has(CpuFeatures::X86::kAVX) || out->has(CpuFeatures::X86::kAVX2) || out->has(CpuFeatures::X86::kFMA) || out->has(CpuFeatures::X86::kF16C)) {
+      // Only AVX512-F|BW|DQ allow to encode AVX/AVX2/FMA/F16C instructions
+      if (out->has(CpuFeatures::X86::kAVX512_F) || out->has(CpuFeatures::X86::kAVX512_BW) || out->has(CpuFeatures::X86::kAVX512_DQ)) {
+        uint32_t usesAvx512 = InstInternal_usesAvx512(options, inst.extraReg(), regAnalysis);
+        uint32_t mustUseEvex = 0;
+
+        switch (instId) {
+          // Special case: VPSLLDQ and VPSRLDQ instructions only allow `reg, reg. imm` combination in AVX|AVX2 mode,
+          // then AVX-512 introduced `reg, reg/mem, imm` combination that uses EVEX prefix. This means that if the
+          // second operand is memory then this is AVX-512_BW instruction and not AVX/AVX2 instruction.
+          case Inst::kIdVpslldq:
+          case Inst::kIdVpsrldq:
+            mustUseEvex = opCount >= 2 && operands[1].isMem();
+            break;
+
+          // Special case: VPBROADCAST[B|D|Q|W] only supports r32/r64 with EVEX prefix.
+          case Inst::kIdVpbroadcastb:
+          case Inst::kIdVpbroadcastd:
+          case Inst::kIdVpbroadcastq:
+          case Inst::kIdVpbroadcastw:
+            mustUseEvex = opCount >= 2 && x86::Reg::isGp(operands[1]);
+            break;
+
+          // Special case: VPERMPD - AVX2 vs AVX512-F case.
+          case Inst::kIdVpermpd:
+            mustUseEvex = opCount >= 3 && !operands[2].isImm();
+            break;
+
+          // Special case: VPERMQ - AVX2 vs AVX512-F case.
+          case Inst::kIdVpermq:
+            mustUseEvex = opCount >= 3 && (operands[1].isMem() || !operands[2].isImm());
+            break;
+        }
+
+        if (!(usesAvx512 | mustUseEvex | regAnalysis.highVecUsed))
+          out->remove(CpuFeatures::X86::kAVX512_F, CpuFeatures::X86::kAVX512_BW, CpuFeatures::X86::kAVX512_DQ, CpuFeatures::X86::kAVX512_VL);
+        else
+          out->remove(CpuFeatures::X86::kAVX, CpuFeatures::X86::kAVX2, CpuFeatures::X86::kFMA, CpuFeatures::X86::kF16C);
+      }
+    }
+
+    // Handle AVX_VNNI vs AVX512_VNNI overlap.
+    if (out->has(CpuFeatures::X86::kAVX512_VNNI)) {
+      // By default the AVX512_VNNI instruction should be used, because it was introduced first. However, VEX|VEX3
+      // prefix can be used to force AVX_VNNI instead.
+      uint32_t usesAvx512 = InstInternal_usesAvx512(options, inst.extraReg(), regAnalysis);
+
+      if (!usesAvx512 && Support::test(options, InstOptions::kX86_Vex | InstOptions::kX86_Vex3))
+        out->remove(CpuFeatures::X86::kAVX512_VNNI, CpuFeatures::X86::kAVX512_VL);
+      else
+        out->remove(CpuFeatures::X86::kAVX_VNNI);
+    }
+
+    // Clear AVX512_VL if ZMM register is used.
+    if (regAnalysis.hasRegType(RegType::kX86_Zmm))
+      out->remove(CpuFeatures::X86::kAVX512_VL);
+  }
+
+  return kErrorOk;
+}
+#endif // !ASMJIT_NO_INTROSPECTION
+
+// x86::InstInternal - Tests
+// =========================
+
+#if defined(ASMJIT_TEST)
+UNIT(x86_inst_api_text) {
+  // All known instructions should be matched.
+  INFO("Matching all X86 instructions");
+  for (uint32_t a = 1; a < Inst::_kIdCount; a++) {
+    StringTmp<128> aName;
+    EXPECT(InstInternal::instIdToString(Arch::kX86, a, aName) == kErrorOk,
+           "Failed to get the name of instruction #%u", a);
+
+    uint32_t b = InstInternal::stringToInstId(Arch::kX86, aName.data(), aName.size());
+    StringTmp<128> bName;
+    InstInternal::instIdToString(Arch::kX86, b, bName);
+
+    EXPECT(a == b,
+           "Instructions do not match \"%s\" (#%u) != \"%s\" (#%u)", aName.data(), a, bName.data(), b);
+  }
+}
+
+template<typename... Args>
+static Error queryRWInfoSimple(InstRWInfo* out, Arch arch, InstId instId, InstOptions options, Args&&... args) {
+  BaseInst inst(instId);
+  inst.addOptions(options);
+  Operand_ opArray[] = { std::forward<Args>(args)... };
+  return InstInternal::queryRWInfo(arch, inst, opArray, sizeof...(args), out);
+}
+
+UNIT(x86_inst_api_rm_feature) {
+  INFO("Verifying whether RM/feature is reported correctly for PEXTRW instruction");
+  {
+    InstRWInfo rwi;
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdPextrw, InstOptions::kNone, eax, mm1, imm(1));
+    EXPECT(rwi.rmFeature() == 0);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdPextrw, InstOptions::kNone, eax, xmm1, imm(1));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kSSE4_1);
+  }
+
+  INFO("Verifying whether RM/feature is reported correctly for AVX512 shift instructions");
+  {
+    InstRWInfo rwi;
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpslld, InstOptions::kNone, xmm1, xmm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsllq, InstOptions::kNone, ymm1, ymm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrad, InstOptions::kNone, xmm1, xmm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrld, InstOptions::kNone, ymm1, ymm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrlq, InstOptions::kNone, xmm1, xmm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpslldq, InstOptions::kNone, xmm1, xmm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsllw, InstOptions::kNone, ymm1, ymm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsraw, InstOptions::kNone, xmm1, xmm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrldq, InstOptions::kNone, ymm1, ymm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrlw, InstOptions::kNone, xmm1, xmm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpslld, InstOptions::kNone, xmm1, xmm2, xmm3);
+    EXPECT(rwi.rmFeature() == 0);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsllw, InstOptions::kNone, xmm1, xmm2, xmm3);
+    EXPECT(rwi.rmFeature() == 0);
+  }
+}
+#endif
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86
diff --git a/lib/lepton/asmjit/x86/x86instapi_p.h b/lib/lepton/asmjit/x86/x86instapi_p.h
new file mode 100644
index 0000000000..56f7fb928f
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86instapi_p.h
@@ -0,0 +1,41 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86INSTAPI_P_H_INCLUDED
+#define ASMJIT_X86_X86INSTAPI_P_H_INCLUDED
+
+#include "../core/inst.h"
+#include "../core/operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+namespace InstInternal {
+
+#ifndef ASMJIT_NO_TEXT
+Error ASMJIT_CDECL instIdToString(Arch arch, InstId instId, String& output) noexcept;
+InstId ASMJIT_CDECL stringToInstId(Arch arch, const char* s, size_t len) noexcept;
+#endif // !ASMJIT_NO_TEXT
+
+#ifndef ASMJIT_NO_VALIDATION
+Error ASMJIT_CDECL validate(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags) noexcept;
+#endif // !ASMJIT_NO_VALIDATION
+
+#ifndef ASMJIT_NO_INTROSPECTION
+Error ASMJIT_CDECL queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept;
+Error ASMJIT_CDECL queryFeatures(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, CpuFeatures* out) noexcept;
+#endif // !ASMJIT_NO_INTROSPECTION
+
+} // {InstInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86INSTAPI_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86instdb.cpp b/lib/lepton/asmjit/x86/x86instdb.cpp
new file mode 100644
index 0000000000..ee22526c34
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86instdb.cpp
@@ -0,0 +1,4427 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+// ----------------------------------------------------------------------------
+// IMPORTANT: AsmJit now uses an external instruction database to populate
+// static tables within this file. Perform the following steps to regenerate
+// all tables enclosed by ${...}:
+//
+//   1. Install node.js environment <https://nodejs.org>
+//   2. Go to asmjit/tools directory
+//   3. Get the latest asmdb from <https://github.com/asmjit/asmdb> and
+//      copy/link the `asmdb` directory to `asmjit/tools/asmdb`.
+//   4. Execute `node tablegen-x86.js`
+//
+// Instruction encoding and opcodes were added to the `x86inst.cpp` database
+// manually in the past and they are not updated by the script as it became
+// tricky. However, everything else is updated including instruction operands
+// and tables required to validate them, instruction read/write information
+// (including registers and flags), and all indexes to all tables.
+// ----------------------------------------------------------------------------
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86)
+
+#include "../core/cpuinfo.h"
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../x86/x86instdb_p.h"
+#include "../x86/x86opcode_p.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::InstDB - InstInfo
+// ======================
+
+// Instruction opcode definitions:
+//   - `O` encodes X86|MMX|SSE instructions.
+//   - `V` encodes VEX|XOP|EVEX instructions.
+//   - `E` encodes EVEX instructions only.
+#define O_ENCODE(PREFIX, OPCODE, O, L, W, EvexW, N, TT) ((PREFIX) | (OPCODE) | (O) | (L) | (W) | (EvexW) | (N) | (TT))
+
+#define O(PREFIX, OPCODE, ModO, LL, W, EvexW, N, ModRM) (O_ENCODE(Opcode::k##PREFIX, 0x##OPCODE, Opcode::kModO_##ModO, Opcode::kLL_##LL, Opcode::kW_##W, Opcode::kEvex_W_##EvexW, Opcode::kCDSHL_##N, Opcode::kModRM_##ModRM))
+#define V(PREFIX, OPCODE, ModO, LL, W, EvexW, N, TT) (O_ENCODE(Opcode::k##PREFIX, 0x##OPCODE, Opcode::kModO_##ModO, Opcode::kLL_##LL, Opcode::kW_##W, Opcode::kEvex_W_##EvexW, Opcode::kCDSHL_##N, Opcode::kCDTT_##TT))
+#define E(PREFIX, OPCODE, ModO, LL, W, EvexW, N, TT) (O_ENCODE(Opcode::k##PREFIX, 0x##OPCODE, Opcode::kModO_##ModO, Opcode::kLL_##LL, Opcode::kW_##W, Opcode::kEvex_W_##EvexW, Opcode::kCDSHL_##N, Opcode::kCDTT_##TT) | Opcode::kMM_ForceEvex)
+#define O_FPU(PREFIX, OPCODE, ModO) (Opcode::kFPU_##PREFIX | (0x##OPCODE & 0xFFu) | ((0x##OPCODE >> 8) << Opcode::kFPU_2B_Shift) | Opcode::kModO_##ModO)
+
+// Don't store `_nameDataIndex` if instruction names are disabled. Since some
+// APIs can use `_nameDataIndex` it's much safer if it's zero if it's not defined.
+#ifndef ASMJIT_NO_TEXT
+  #define NAME_DATA_INDEX(Index) Index
+#else
+  #define NAME_DATA_INDEX(Index) 0
+#endif
+
+// Defines an X86 instruction.
+#define INST(id, encoding, opcode0, opcode1, mainOpcodeIndex, altOpcodeIndex, nameDataIndex, commomInfoIndex, additionalInfoIndex) { \
+  uint32_t(NAME_DATA_INDEX(nameDataIndex)), \
+  uint32_t(commomInfoIndex),                \
+  uint32_t(additionalInfoIndex),            \
+  uint8_t(InstDB::kEncoding##encoding),     \
+  uint8_t((opcode0) & 0xFFu),               \
+  uint8_t(mainOpcodeIndex),                 \
+  uint8_t(altOpcodeIndex)                   \
+}
+
+const InstDB::InstInfo InstDB::_instInfoTable[] = {
+  /*--------------------+--------------------+------------------+--------+------------------+--------+----+----+------+----+----+
+  |    Instruction      |    Instruction     |    Main Opcode   |  EVEX  |Alternative Opcode|  EVEX  |Op0X|Op1X|Name-X|IdxA|IdxB|
+  |     Id & Name       |      Encoding      |  (pp+mmm|op/o|L|w|W|N|TT.)|--(pp+mmm|op/o|L|w|W|N|TT.)|     (auto-generated)     |
+  +---------------------+--------------------+---------+----+-+-+-+-+----+---------+----+-+-+-+-+----+----+----+------+----+---*/
+  // ${InstInfo:Begin}
+  INST(None             , None               , 0                         , 0                         , 0  , 0  , 0    , 0  , 0  ), // #0
+  INST(Aaa              , X86Op_xAX          , O(000000,37,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1    , 1  , 1  ), // #1
+  INST(Aad              , X86I_xAX           , O(000000,D5,_,_,_,_,_,_  ), 0                         , 0  , 0  , 5    , 2  , 1  ), // #2
+  INST(Aam              , X86I_xAX           , O(000000,D4,_,_,_,_,_,_  ), 0                         , 0  , 0  , 9    , 2  , 1  ), // #3
+  INST(Aas              , X86Op_xAX          , O(000000,3F,_,_,_,_,_,_  ), 0                         , 0  , 0  , 13   , 1  , 1  ), // #4
+  INST(Adc              , X86Arith           , O(000000,10,2,_,x,_,_,_  ), 0                         , 1  , 0  , 17   , 3  , 2  ), // #5
+  INST(Adcx             , X86Rm              , O(660F38,F6,_,_,x,_,_,_  ), 0                         , 2  , 0  , 21   , 4  , 3  ), // #6
+  INST(Add              , X86Arith           , O(000000,00,0,_,x,_,_,_  ), 0                         , 0  , 0  , 3146 , 3  , 1  ), // #7
+  INST(Addpd            , ExtRm              , O(660F00,58,_,_,_,_,_,_  ), 0                         , 3  , 0  , 5788 , 5  , 4  ), // #8
+  INST(Addps            , ExtRm              , O(000F00,58,_,_,_,_,_,_  ), 0                         , 4  , 0  , 5800 , 5  , 5  ), // #9
+  INST(Addsd            , ExtRm              , O(F20F00,58,_,_,_,_,_,_  ), 0                         , 5  , 0  , 6118 , 6  , 4  ), // #10
+  INST(Addss            , ExtRm              , O(F30F00,58,_,_,_,_,_,_  ), 0                         , 6  , 0  , 3283 , 7  , 5  ), // #11
+  INST(Addsubpd         , ExtRm              , O(660F00,D0,_,_,_,_,_,_  ), 0                         , 3  , 0  , 5410 , 5  , 6  ), // #12
+  INST(Addsubps         , ExtRm              , O(F20F00,D0,_,_,_,_,_,_  ), 0                         , 5  , 0  , 5422 , 5  , 6  ), // #13
+  INST(Adox             , X86Rm              , O(F30F38,F6,_,_,x,_,_,_  ), 0                         , 7  , 0  , 26   , 4  , 7  ), // #14
+  INST(Aesdec           , ExtRm              , O(660F38,DE,_,_,_,_,_,_  ), 0                         , 2  , 0  , 3352 , 5  , 8  ), // #15
+  INST(Aesdeclast       , ExtRm              , O(660F38,DF,_,_,_,_,_,_  ), 0                         , 2  , 0  , 3360 , 5  , 8  ), // #16
+  INST(Aesenc           , ExtRm              , O(660F38,DC,_,_,_,_,_,_  ), 0                         , 2  , 0  , 3372 , 5  , 8  ), // #17
+  INST(Aesenclast       , ExtRm              , O(660F38,DD,_,_,_,_,_,_  ), 0                         , 2  , 0  , 3380 , 5  , 8  ), // #18
+  INST(Aesimc           , ExtRm              , O(660F38,DB,_,_,_,_,_,_  ), 0                         , 2  , 0  , 3392 , 5  , 8  ), // #19
+  INST(Aeskeygenassist  , ExtRmi             , O(660F3A,DF,_,_,_,_,_,_  ), 0                         , 8  , 0  , 3400 , 8  , 8  ), // #20
+  INST(And              , X86Arith           , O(000000,20,4,_,x,_,_,_  ), 0                         , 9  , 0  , 2525 , 9  , 1  ), // #21
+  INST(Andn             , VexRvm_Wx          , V(000F38,F2,_,0,x,_,_,_  ), 0                         , 10 , 0  , 7789 , 10 , 9  ), // #22
+  INST(Andnpd           , ExtRm              , O(660F00,55,_,_,_,_,_,_  ), 0                         , 3  , 0  , 3433 , 5  , 4  ), // #23
+  INST(Andnps           , ExtRm              , O(000F00,55,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3441 , 5  , 5  ), // #24
+  INST(Andpd            , ExtRm              , O(660F00,54,_,_,_,_,_,_  ), 0                         , 3  , 0  , 4745 , 11 , 4  ), // #25
+  INST(Andps            , ExtRm              , O(000F00,54,_,_,_,_,_,_  ), 0                         , 4  , 0  , 4755 , 11 , 5  ), // #26
+  INST(Arpl             , X86Mr_NoSize       , O(000000,63,_,_,_,_,_,_  ), 0                         , 0  , 0  , 31   , 12 , 10 ), // #27
+  INST(Bextr            , VexRmv_Wx          , V(000F38,F7,_,0,x,_,_,_  ), 0                         , 10 , 0  , 36   , 13 , 9  ), // #28
+  INST(Blcfill          , VexVm_Wx           , V(XOP_M9,01,1,0,x,_,_,_  ), 0                         , 11 , 0  , 42   , 14 , 11 ), // #29
+  INST(Blci             , VexVm_Wx           , V(XOP_M9,02,6,0,x,_,_,_  ), 0                         , 12 , 0  , 50   , 14 , 11 ), // #30
+  INST(Blcic            , VexVm_Wx           , V(XOP_M9,01,5,0,x,_,_,_  ), 0                         , 13 , 0  , 55   , 14 , 11 ), // #31
+  INST(Blcmsk           , VexVm_Wx           , V(XOP_M9,02,1,0,x,_,_,_  ), 0                         , 11 , 0  , 61   , 14 , 11 ), // #32
+  INST(Blcs             , VexVm_Wx           , V(XOP_M9,01,3,0,x,_,_,_  ), 0                         , 14 , 0  , 68   , 14 , 11 ), // #33
+  INST(Blendpd          , ExtRmi             , O(660F3A,0D,_,_,_,_,_,_  ), 0                         , 8  , 0  , 3483 , 8  , 12 ), // #34
+  INST(Blendps          , ExtRmi             , O(660F3A,0C,_,_,_,_,_,_  ), 0                         , 8  , 0  , 3492 , 8  , 12 ), // #35
+  INST(Blendvpd         , ExtRm_XMM0         , O(660F38,15,_,_,_,_,_,_  ), 0                         , 2  , 0  , 3501 , 15 , 12 ), // #36
+  INST(Blendvps         , ExtRm_XMM0         , O(660F38,14,_,_,_,_,_,_  ), 0                         , 2  , 0  , 3511 , 15 , 12 ), // #37
+  INST(Blsfill          , VexVm_Wx           , V(XOP_M9,01,2,0,x,_,_,_  ), 0                         , 15 , 0  , 73   , 14 , 11 ), // #38
+  INST(Blsi             , VexVm_Wx           , V(000F38,F3,3,0,x,_,_,_  ), 0                         , 16 , 0  , 81   , 14 , 9  ), // #39
+  INST(Blsic            , VexVm_Wx           , V(XOP_M9,01,6,0,x,_,_,_  ), 0                         , 12 , 0  , 86   , 14 , 11 ), // #40
+  INST(Blsmsk           , VexVm_Wx           , V(000F38,F3,2,0,x,_,_,_  ), 0                         , 17 , 0  , 92   , 14 , 9  ), // #41
+  INST(Blsr             , VexVm_Wx           , V(000F38,F3,1,0,x,_,_,_  ), 0                         , 18 , 0  , 99   , 14 , 9  ), // #42
+  INST(Bndcl            , X86Rm              , O(F30F00,1A,_,_,_,_,_,_  ), 0                         , 6  , 0  , 104  , 16 , 13 ), // #43
+  INST(Bndcn            , X86Rm              , O(F20F00,1B,_,_,_,_,_,_  ), 0                         , 5  , 0  , 110  , 16 , 13 ), // #44
+  INST(Bndcu            , X86Rm              , O(F20F00,1A,_,_,_,_,_,_  ), 0                         , 5  , 0  , 116  , 16 , 13 ), // #45
+  INST(Bndldx           , X86Rm              , O(000F00,1A,_,_,_,_,_,_  ), 0                         , 4  , 0  , 122  , 17 , 13 ), // #46
+  INST(Bndmk            , X86Rm              , O(F30F00,1B,_,_,_,_,_,_  ), 0                         , 6  , 0  , 129  , 18 , 13 ), // #47
+  INST(Bndmov           , X86Bndmov          , O(660F00,1A,_,_,_,_,_,_  ), O(660F00,1B,_,_,_,_,_,_  ), 3  , 1  , 135  , 19 , 13 ), // #48
+  INST(Bndstx           , X86Mr              , O(000F00,1B,_,_,_,_,_,_  ), 0                         , 4  , 0  , 142  , 20 , 13 ), // #49
+  INST(Bound            , X86Rm              , O(000000,62,_,_,_,_,_,_  ), 0                         , 0  , 0  , 149  , 21 , 0  ), // #50
+  INST(Bsf              , X86Rm              , O(000F00,BC,_,_,x,_,_,_  ), 0                         , 4  , 0  , 155  , 22 , 1  ), // #51
+  INST(Bsr              , X86Rm              , O(000F00,BD,_,_,x,_,_,_  ), 0                         , 4  , 0  , 159  , 22 , 1  ), // #52
+  INST(Bswap            , X86Bswap           , O(000F00,C8,_,_,x,_,_,_  ), 0                         , 4  , 0  , 163  , 23 , 0  ), // #53
+  INST(Bt               , X86Bt              , O(000F00,A3,_,_,x,_,_,_  ), O(000F00,BA,4,_,x,_,_,_  ), 4  , 2  , 169  , 24 , 14 ), // #54
+  INST(Btc              , X86Bt              , O(000F00,BB,_,_,x,_,_,_  ), O(000F00,BA,7,_,x,_,_,_  ), 4  , 3  , 172  , 25 , 14 ), // #55
+  INST(Btr              , X86Bt              , O(000F00,B3,_,_,x,_,_,_  ), O(000F00,BA,6,_,x,_,_,_  ), 4  , 4  , 176  , 25 , 14 ), // #56
+  INST(Bts              , X86Bt              , O(000F00,AB,_,_,x,_,_,_  ), O(000F00,BA,5,_,x,_,_,_  ), 4  , 5  , 180  , 25 , 14 ), // #57
+  INST(Bzhi             , VexRmv_Wx          , V(000F38,F5,_,0,x,_,_,_  ), 0                         , 10 , 0  , 184  , 13 , 15 ), // #58
+  INST(Call             , X86Call            , O(000000,FF,2,_,_,_,_,_  ), 0                         , 1  , 0  , 3038 , 26 , 1  ), // #59
+  INST(Cbw              , X86Op_xAX          , O(660000,98,_,_,_,_,_,_  ), 0                         , 19 , 0  , 189  , 27 , 0  ), // #60
+  INST(Cdq              , X86Op_xDX_xAX      , O(000000,99,_,_,_,_,_,_  ), 0                         , 0  , 0  , 193  , 28 , 0  ), // #61
+  INST(Cdqe             , X86Op_xAX          , O(000000,98,_,_,1,_,_,_  ), 0                         , 20 , 0  , 197  , 29 , 0  ), // #62
+  INST(Clac             , X86Op              , O(000F01,CA,_,_,_,_,_,_  ), 0                         , 21 , 0  , 202  , 30 , 16 ), // #63
+  INST(Clc              , X86Op              , O(000000,F8,_,_,_,_,_,_  ), 0                         , 0  , 0  , 207  , 30 , 17 ), // #64
+  INST(Cld              , X86Op              , O(000000,FC,_,_,_,_,_,_  ), 0                         , 0  , 0  , 211  , 30 , 18 ), // #65
+  INST(Cldemote         , X86M_Only          , O(000F00,1C,0,_,_,_,_,_  ), 0                         , 4  , 0  , 215  , 31 , 19 ), // #66
+  INST(Clflush          , X86M_Only          , O(000F00,AE,7,_,_,_,_,_  ), 0                         , 22 , 0  , 224  , 31 , 20 ), // #67
+  INST(Clflushopt       , X86M_Only          , O(660F00,AE,7,_,_,_,_,_  ), 0                         , 23 , 0  , 232  , 31 , 21 ), // #68
+  INST(Clgi             , X86Op              , O(000F01,DD,_,_,_,_,_,_  ), 0                         , 21 , 0  , 243  , 30 , 22 ), // #69
+  INST(Cli              , X86Op              , O(000000,FA,_,_,_,_,_,_  ), 0                         , 0  , 0  , 248  , 30 , 23 ), // #70
+  INST(Clrssbsy         , X86M_Only          , O(F30F00,AE,6,_,_,_,_,_  ), 0                         , 24 , 0  , 252  , 32 , 24 ), // #71
+  INST(Clts             , X86Op              , O(000F00,06,_,_,_,_,_,_  ), 0                         , 4  , 0  , 261  , 30 , 0  ), // #72
+  INST(Clui             , X86Op              , O(F30F01,EE,_,_,_,_,_,_  ), 0                         , 25 , 0  , 266  , 33 , 25 ), // #73
+  INST(Clwb             , X86M_Only          , O(660F00,AE,6,_,_,_,_,_  ), 0                         , 26 , 0  , 271  , 31 , 26 ), // #74
+  INST(Clzero           , X86Op_MemZAX       , O(000F01,FC,_,_,_,_,_,_  ), 0                         , 21 , 0  , 276  , 34 , 27 ), // #75
+  INST(Cmc              , X86Op              , O(000000,F5,_,_,_,_,_,_  ), 0                         , 0  , 0  , 283  , 30 , 28 ), // #76
+  INST(Cmova            , X86Rm              , O(000F00,47,_,_,x,_,_,_  ), 0                         , 4  , 0  , 287  , 22 , 29 ), // #77
+  INST(Cmovae           , X86Rm              , O(000F00,43,_,_,x,_,_,_  ), 0                         , 4  , 0  , 293  , 22 , 30 ), // #78
+  INST(Cmovb            , X86Rm              , O(000F00,42,_,_,x,_,_,_  ), 0                         , 4  , 0  , 648  , 22 , 30 ), // #79
+  INST(Cmovbe           , X86Rm              , O(000F00,46,_,_,x,_,_,_  ), 0                         , 4  , 0  , 655  , 22 , 29 ), // #80
+  INST(Cmovc            , X86Rm              , O(000F00,42,_,_,x,_,_,_  ), 0                         , 4  , 0  , 300  , 22 , 30 ), // #81
+  INST(Cmove            , X86Rm              , O(000F00,44,_,_,x,_,_,_  ), 0                         , 4  , 0  , 663  , 22 , 31 ), // #82
+  INST(Cmovg            , X86Rm              , O(000F00,4F,_,_,x,_,_,_  ), 0                         , 4  , 0  , 306  , 22 , 32 ), // #83
+  INST(Cmovge           , X86Rm              , O(000F00,4D,_,_,x,_,_,_  ), 0                         , 4  , 0  , 312  , 22 , 33 ), // #84
+  INST(Cmovl            , X86Rm              , O(000F00,4C,_,_,x,_,_,_  ), 0                         , 4  , 0  , 319  , 22 , 33 ), // #85
+  INST(Cmovle           , X86Rm              , O(000F00,4E,_,_,x,_,_,_  ), 0                         , 4  , 0  , 325  , 22 , 32 ), // #86
+  INST(Cmovna           , X86Rm              , O(000F00,46,_,_,x,_,_,_  ), 0                         , 4  , 0  , 332  , 22 , 29 ), // #87
+  INST(Cmovnae          , X86Rm              , O(000F00,42,_,_,x,_,_,_  ), 0                         , 4  , 0  , 339  , 22 , 30 ), // #88
+  INST(Cmovnb           , X86Rm              , O(000F00,43,_,_,x,_,_,_  ), 0                         , 4  , 0  , 670  , 22 , 30 ), // #89
+  INST(Cmovnbe          , X86Rm              , O(000F00,47,_,_,x,_,_,_  ), 0                         , 4  , 0  , 678  , 22 , 29 ), // #90
+  INST(Cmovnc           , X86Rm              , O(000F00,43,_,_,x,_,_,_  ), 0                         , 4  , 0  , 347  , 22 , 30 ), // #91
+  INST(Cmovne           , X86Rm              , O(000F00,45,_,_,x,_,_,_  ), 0                         , 4  , 0  , 687  , 22 , 31 ), // #92
+  INST(Cmovng           , X86Rm              , O(000F00,4E,_,_,x,_,_,_  ), 0                         , 4  , 0  , 354  , 22 , 32 ), // #93
+  INST(Cmovnge          , X86Rm              , O(000F00,4C,_,_,x,_,_,_  ), 0                         , 4  , 0  , 361  , 22 , 33 ), // #94
+  INST(Cmovnl           , X86Rm              , O(000F00,4D,_,_,x,_,_,_  ), 0                         , 4  , 0  , 369  , 22 , 33 ), // #95
+  INST(Cmovnle          , X86Rm              , O(000F00,4F,_,_,x,_,_,_  ), 0                         , 4  , 0  , 376  , 22 , 32 ), // #96
+  INST(Cmovno           , X86Rm              , O(000F00,41,_,_,x,_,_,_  ), 0                         , 4  , 0  , 384  , 22 , 34 ), // #97
+  INST(Cmovnp           , X86Rm              , O(000F00,4B,_,_,x,_,_,_  ), 0                         , 4  , 0  , 391  , 22 , 35 ), // #98
+  INST(Cmovns           , X86Rm              , O(000F00,49,_,_,x,_,_,_  ), 0                         , 4  , 0  , 398  , 22 , 36 ), // #99
+  INST(Cmovnz           , X86Rm              , O(000F00,45,_,_,x,_,_,_  ), 0                         , 4  , 0  , 405  , 22 , 31 ), // #100
+  INST(Cmovo            , X86Rm              , O(000F00,40,_,_,x,_,_,_  ), 0                         , 4  , 0  , 412  , 22 , 34 ), // #101
+  INST(Cmovp            , X86Rm              , O(000F00,4A,_,_,x,_,_,_  ), 0                         , 4  , 0  , 418  , 22 , 35 ), // #102
+  INST(Cmovpe           , X86Rm              , O(000F00,4A,_,_,x,_,_,_  ), 0                         , 4  , 0  , 424  , 22 , 35 ), // #103
+  INST(Cmovpo           , X86Rm              , O(000F00,4B,_,_,x,_,_,_  ), 0                         , 4  , 0  , 431  , 22 , 35 ), // #104
+  INST(Cmovs            , X86Rm              , O(000F00,48,_,_,x,_,_,_  ), 0                         , 4  , 0  , 438  , 22 , 36 ), // #105
+  INST(Cmovz            , X86Rm              , O(000F00,44,_,_,x,_,_,_  ), 0                         , 4  , 0  , 444  , 22 , 31 ), // #106
+  INST(Cmp              , X86Arith           , O(000000,38,7,_,x,_,_,_  ), 0                         , 27 , 0  , 450  , 35 , 1  ), // #107
+  INST(Cmppd            , ExtRmi             , O(660F00,C2,_,_,_,_,_,_  ), 0                         , 3  , 0  , 3737 , 8  , 4  ), // #108
+  INST(Cmpps            , ExtRmi             , O(000F00,C2,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3751 , 8  , 5  ), // #109
+  INST(Cmps             , X86StrMm           , O(000000,A6,_,_,_,_,_,_  ), 0                         , 0  , 0  , 454  , 36 , 37 ), // #110
+  INST(Cmpsd            , ExtRmi             , O(F20F00,C2,_,_,_,_,_,_  ), 0                         , 5  , 0  , 3758 , 37 , 4  ), // #111
+  INST(Cmpss            , ExtRmi             , O(F30F00,C2,_,_,_,_,_,_  ), 0                         , 6  , 0  , 3772 , 38 , 5  ), // #112
+  INST(Cmpxchg          , X86Cmpxchg         , O(000F00,B0,_,_,x,_,_,_  ), 0                         , 4  , 0  , 459  , 39 , 38 ), // #113
+  INST(Cmpxchg16b       , X86Cmpxchg8b_16b   , O(000F00,C7,1,_,1,_,_,_  ), 0                         , 28 , 0  , 467  , 40 , 39 ), // #114
+  INST(Cmpxchg8b        , X86Cmpxchg8b_16b   , O(000F00,C7,1,_,_,_,_,_  ), 0                         , 29 , 0  , 478  , 41 , 40 ), // #115
+  INST(Comisd           , ExtRm              , O(660F00,2F,_,_,_,_,_,_  ), 0                         , 3  , 0  , 11391, 6  , 41 ), // #116
+  INST(Comiss           , ExtRm              , O(000F00,2F,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11409, 7  , 42 ), // #117
+  INST(Cpuid            , X86Op              , O(000F00,A2,_,_,_,_,_,_  ), 0                         , 4  , 0  , 488  , 42 , 43 ), // #118
+  INST(Cqo              , X86Op_xDX_xAX      , O(000000,99,_,_,1,_,_,_  ), 0                         , 20 , 0  , 494  , 43 , 0  ), // #119
+  INST(Crc32            , X86Crc             , O(F20F38,F0,_,_,x,_,_,_  ), 0                         , 30 , 0  , 498  , 44 , 44 ), // #120
+  INST(Cvtdq2pd         , ExtRm              , O(F30F00,E6,_,_,_,_,_,_  ), 0                         , 6  , 0  , 3827 , 6  , 4  ), // #121
+  INST(Cvtdq2ps         , ExtRm              , O(000F00,5B,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3847 , 5  , 4  ), // #122
+  INST(Cvtpd2dq         , ExtRm              , O(F20F00,E6,_,_,_,_,_,_  ), 0                         , 5  , 0  , 3886 , 5  , 4  ), // #123
+  INST(Cvtpd2pi         , ExtRm              , O(660F00,2D,_,_,_,_,_,_  ), 0                         , 3  , 0  , 504  , 45 , 4  ), // #124
+  INST(Cvtpd2ps         , ExtRm              , O(660F00,5A,_,_,_,_,_,_  ), 0                         , 3  , 0  , 3906 , 5  , 4  ), // #125
+  INST(Cvtpi2pd         , ExtRm              , O(660F00,2A,_,_,_,_,_,_  ), 0                         , 3  , 0  , 513  , 46 , 4  ), // #126
+  INST(Cvtpi2ps         , ExtRm              , O(000F00,2A,_,_,_,_,_,_  ), 0                         , 4  , 0  , 522  , 46 , 5  ), // #127
+  INST(Cvtps2dq         , ExtRm              , O(660F00,5B,_,_,_,_,_,_  ), 0                         , 3  , 0  , 4040 , 5  , 4  ), // #128
+  INST(Cvtps2pd         , ExtRm              , O(000F00,5A,_,_,_,_,_,_  ), 0                         , 4  , 0  , 4050 , 6  , 4  ), // #129
+  INST(Cvtps2pi         , ExtRm              , O(000F00,2D,_,_,_,_,_,_  ), 0                         , 4  , 0  , 531  , 47 , 5  ), // #130
+  INST(Cvtsd2si         , ExtRm_Wx_GpqOnly   , O(F20F00,2D,_,_,x,_,_,_  ), 0                         , 5  , 0  , 4153 , 48 , 4  ), // #131
+  INST(Cvtsd2ss         , ExtRm              , O(F20F00,5A,_,_,_,_,_,_  ), 0                         , 5  , 0  , 4163 , 6  , 4  ), // #132
+  INST(Cvtsi2sd         , ExtRm_Wx           , O(F20F00,2A,_,_,x,_,_,_  ), 0                         , 5  , 0  , 4225 , 49 , 4  ), // #133
+  INST(Cvtsi2ss         , ExtRm_Wx           , O(F30F00,2A,_,_,x,_,_,_  ), 0                         , 6  , 0  , 4245 , 49 , 5  ), // #134
+  INST(Cvtss2sd         , ExtRm              , O(F30F00,5A,_,_,_,_,_,_  ), 0                         , 6  , 0  , 4255 , 7  , 4  ), // #135
+  INST(Cvtss2si         , ExtRm_Wx_GpqOnly   , O(F30F00,2D,_,_,x,_,_,_  ), 0                         , 6  , 0  , 4275 , 50 , 5  ), // #136
+  INST(Cvttpd2dq        , ExtRm              , O(660F00,E6,_,_,_,_,_,_  ), 0                         , 3  , 0  , 4296 , 5  , 4  ), // #137
+  INST(Cvttpd2pi        , ExtRm              , O(660F00,2C,_,_,_,_,_,_  ), 0                         , 3  , 0  , 540  , 45 , 4  ), // #138
+  INST(Cvttps2dq        , ExtRm              , O(F30F00,5B,_,_,_,_,_,_  ), 0                         , 6  , 0  , 4409 , 5  , 4  ), // #139
+  INST(Cvttps2pi        , ExtRm              , O(000F00,2C,_,_,_,_,_,_  ), 0                         , 4  , 0  , 550  , 47 , 5  ), // #140
+  INST(Cvttsd2si        , ExtRm_Wx_GpqOnly   , O(F20F00,2C,_,_,x,_,_,_  ), 0                         , 5  , 0  , 4455 , 48 , 4  ), // #141
+  INST(Cvttss2si        , ExtRm_Wx_GpqOnly   , O(F30F00,2C,_,_,x,_,_,_  ), 0                         , 6  , 0  , 4501 , 50 , 5  ), // #142
+  INST(Cwd              , X86Op_xDX_xAX      , O(660000,99,_,_,_,_,_,_  ), 0                         , 19 , 0  , 560  , 51 , 0  ), // #143
+  INST(Cwde             , X86Op_xAX          , O(000000,98,_,_,_,_,_,_  ), 0                         , 0  , 0  , 564  , 52 , 0  ), // #144
+  INST(Daa              , X86Op              , O(000000,27,_,_,_,_,_,_  ), 0                         , 0  , 0  , 569  , 1  , 1  ), // #145
+  INST(Das              , X86Op              , O(000000,2F,_,_,_,_,_,_  ), 0                         , 0  , 0  , 573  , 1  , 1  ), // #146
+  INST(Dec              , X86IncDec          , O(000000,FE,1,_,x,_,_,_  ), O(000000,48,_,_,x,_,_,_  ), 31 , 6  , 3355 , 53 , 45 ), // #147
+  INST(Div              , X86M_GPB_MulDiv    , O(000000,F6,6,_,x,_,_,_  ), 0                         , 32 , 0  , 810  , 54 , 1  ), // #148
+  INST(Divpd            , ExtRm              , O(660F00,5E,_,_,_,_,_,_  ), 0                         , 3  , 0  , 4652 , 5  , 4  ), // #149
+  INST(Divps            , ExtRm              , O(000F00,5E,_,_,_,_,_,_  ), 0                         , 4  , 0  , 4666 , 5  , 5  ), // #150
+  INST(Divsd            , ExtRm              , O(F20F00,5E,_,_,_,_,_,_  ), 0                         , 5  , 0  , 4673 , 6  , 4  ), // #151
+  INST(Divss            , ExtRm              , O(F30F00,5E,_,_,_,_,_,_  ), 0                         , 6  , 0  , 4687 , 7  , 5  ), // #152
+  INST(Dppd             , ExtRmi             , O(660F3A,41,_,_,_,_,_,_  ), 0                         , 8  , 0  , 4704 , 8  , 12 ), // #153
+  INST(Dpps             , ExtRmi             , O(660F3A,40,_,_,_,_,_,_  ), 0                         , 8  , 0  , 4710 , 8  , 12 ), // #154
+  INST(Emms             , X86Op              , O(000F00,77,_,_,_,_,_,_  ), 0                         , 4  , 0  , 778  , 55 , 46 ), // #155
+  INST(Endbr32          , X86Op_Mod11RM      , O(F30F00,1E,7,_,_,_,_,3  ), 0                         , 33 , 0  , 577  , 30 , 47 ), // #156
+  INST(Endbr64          , X86Op_Mod11RM      , O(F30F00,1E,7,_,_,_,_,2  ), 0                         , 34 , 0  , 585  , 30 , 47 ), // #157
+  INST(Enqcmd           , X86EnqcmdMovdir64b , O(F20F38,F8,_,_,_,_,_,_  ), 0                         , 30 , 0  , 593  , 56 , 48 ), // #158
+  INST(Enqcmds          , X86EnqcmdMovdir64b , O(F30F38,F8,_,_,_,_,_,_  ), 0                         , 7  , 0  , 600  , 56 , 48 ), // #159
+  INST(Enter            , X86Enter           , O(000000,C8,_,_,_,_,_,_  ), 0                         , 0  , 0  , 3046 , 57 , 0  ), // #160
+  INST(Extractps        , ExtExtract         , O(660F3A,17,_,_,_,_,_,_  ), 0                         , 8  , 0  , 4900 , 58 , 12 ), // #161
+  INST(Extrq            , ExtExtrq           , O(660F00,79,_,_,_,_,_,_  ), O(660F00,78,0,_,_,_,_,_  ), 3  , 7  , 8625 , 59 , 49 ), // #162
+  INST(F2xm1            , FpuOp              , O_FPU(00,D9F0,_)          , 0                         , 35 , 0  , 608  , 30 , 0  ), // #163
+  INST(Fabs             , FpuOp              , O_FPU(00,D9E1,_)          , 0                         , 35 , 0  , 614  , 30 , 0  ), // #164
+  INST(Fadd             , FpuArith           , O_FPU(00,C0C0,0)          , 0                         , 36 , 0  , 2121 , 60 , 0  ), // #165
+  INST(Faddp            , FpuRDef            , O_FPU(00,DEC0,_)          , 0                         , 37 , 0  , 619  , 61 , 0  ), // #166
+  INST(Fbld             , X86M_Only          , O_FPU(00,00DF,4)          , 0                         , 38 , 0  , 625  , 62 , 0  ), // #167
+  INST(Fbstp            , X86M_Only          , O_FPU(00,00DF,6)          , 0                         , 39 , 0  , 630  , 62 , 0  ), // #168
+  INST(Fchs             , FpuOp              , O_FPU(00,D9E0,_)          , 0                         , 35 , 0  , 636  , 30 , 0  ), // #169
+  INST(Fclex            , FpuOp              , O_FPU(9B,DBE2,_)          , 0                         , 40 , 0  , 641  , 30 , 0  ), // #170
+  INST(Fcmovb           , FpuR               , O_FPU(00,DAC0,_)          , 0                         , 41 , 0  , 647  , 63 , 30 ), // #171
+  INST(Fcmovbe          , FpuR               , O_FPU(00,DAD0,_)          , 0                         , 41 , 0  , 654  , 63 , 29 ), // #172
+  INST(Fcmove           , FpuR               , O_FPU(00,DAC8,_)          , 0                         , 41 , 0  , 662  , 63 , 31 ), // #173
+  INST(Fcmovnb          , FpuR               , O_FPU(00,DBC0,_)          , 0                         , 42 , 0  , 669  , 63 , 30 ), // #174
+  INST(Fcmovnbe         , FpuR               , O_FPU(00,DBD0,_)          , 0                         , 42 , 0  , 677  , 63 , 29 ), // #175
+  INST(Fcmovne          , FpuR               , O_FPU(00,DBC8,_)          , 0                         , 42 , 0  , 686  , 63 , 31 ), // #176
+  INST(Fcmovnu          , FpuR               , O_FPU(00,DBD8,_)          , 0                         , 42 , 0  , 694  , 63 , 35 ), // #177
+  INST(Fcmovu           , FpuR               , O_FPU(00,DAD8,_)          , 0                         , 41 , 0  , 702  , 63 , 35 ), // #178
+  INST(Fcom             , FpuCom             , O_FPU(00,D0D0,2)          , 0                         , 43 , 0  , 709  , 64 , 0  ), // #179
+  INST(Fcomi            , FpuR               , O_FPU(00,DBF0,_)          , 0                         , 42 , 0  , 714  , 63 , 50 ), // #180
+  INST(Fcomip           , FpuR               , O_FPU(00,DFF0,_)          , 0                         , 44 , 0  , 720  , 63 , 50 ), // #181
+  INST(Fcomp            , FpuCom             , O_FPU(00,D8D8,3)          , 0                         , 45 , 0  , 727  , 64 , 0  ), // #182
+  INST(Fcompp           , FpuOp              , O_FPU(00,DED9,_)          , 0                         , 37 , 0  , 733  , 30 , 0  ), // #183
+  INST(Fcos             , FpuOp              , O_FPU(00,D9FF,_)          , 0                         , 35 , 0  , 740  , 30 , 0  ), // #184
+  INST(Fdecstp          , FpuOp              , O_FPU(00,D9F6,_)          , 0                         , 35 , 0  , 745  , 30 , 0  ), // #185
+  INST(Fdiv             , FpuArith           , O_FPU(00,F0F8,6)          , 0                         , 46 , 0  , 753  , 60 , 0  ), // #186
+  INST(Fdivp            , FpuRDef            , O_FPU(00,DEF8,_)          , 0                         , 37 , 0  , 758  , 61 , 0  ), // #187
+  INST(Fdivr            , FpuArith           , O_FPU(00,F8F0,7)          , 0                         , 47 , 0  , 764  , 60 , 0  ), // #188
+  INST(Fdivrp           , FpuRDef            , O_FPU(00,DEF0,_)          , 0                         , 37 , 0  , 770  , 61 , 0  ), // #189
+  INST(Femms            , X86Op              , O(000F00,0E,_,_,_,_,_,_  ), 0                         , 4  , 0  , 777  , 30 , 51 ), // #190
+  INST(Ffree            , FpuR               , O_FPU(00,DDC0,_)          , 0                         , 48 , 0  , 783  , 63 , 0  ), // #191
+  INST(Fiadd            , FpuM               , O_FPU(00,00DA,0)          , 0                         , 49 , 0  , 789  , 65 , 0  ), // #192
+  INST(Ficom            , FpuM               , O_FPU(00,00DA,2)          , 0                         , 50 , 0  , 795  , 65 , 0  ), // #193
+  INST(Ficomp           , FpuM               , O_FPU(00,00DA,3)          , 0                         , 51 , 0  , 801  , 65 , 0  ), // #194
+  INST(Fidiv            , FpuM               , O_FPU(00,00DA,6)          , 0                         , 39 , 0  , 808  , 65 , 0  ), // #195
+  INST(Fidivr           , FpuM               , O_FPU(00,00DA,7)          , 0                         , 52 , 0  , 814  , 65 , 0  ), // #196
+  INST(Fild             , FpuM               , O_FPU(00,00DB,0)          , O_FPU(00,00DF,5)          , 49 , 8  , 821  , 66 , 0  ), // #197
+  INST(Fimul            , FpuM               , O_FPU(00,00DA,1)          , 0                         , 53 , 0  , 826  , 65 , 0  ), // #198
+  INST(Fincstp          , FpuOp              , O_FPU(00,D9F7,_)          , 0                         , 35 , 0  , 832  , 30 , 0  ), // #199
+  INST(Finit            , FpuOp              , O_FPU(9B,DBE3,_)          , 0                         , 40 , 0  , 840  , 30 , 0  ), // #200
+  INST(Fist             , FpuM               , O_FPU(00,00DB,2)          , 0                         , 50 , 0  , 846  , 65 , 0  ), // #201
+  INST(Fistp            , FpuM               , O_FPU(00,00DB,3)          , O_FPU(00,00DF,7)          , 51 , 9  , 851  , 66 , 0  ), // #202
+  INST(Fisttp           , FpuM               , O_FPU(00,00DB,1)          , O_FPU(00,00DD,1)          , 53 , 10 , 857  , 66 , 6  ), // #203
+  INST(Fisub            , FpuM               , O_FPU(00,00DA,4)          , 0                         , 38 , 0  , 864  , 65 , 0  ), // #204
+  INST(Fisubr           , FpuM               , O_FPU(00,00DA,5)          , 0                         , 54 , 0  , 870  , 65 , 0  ), // #205
+  INST(Fld              , FpuFldFst          , O_FPU(00,00D9,0)          , O_FPU(00,00DB,5)          , 49 , 11 , 877  , 67 , 0  ), // #206
+  INST(Fld1             , FpuOp              , O_FPU(00,D9E8,_)          , 0                         , 35 , 0  , 881  , 30 , 0  ), // #207
+  INST(Fldcw            , X86M_Only          , O_FPU(00,00D9,5)          , 0                         , 54 , 0  , 886  , 68 , 0  ), // #208
+  INST(Fldenv           , X86M_Only          , O_FPU(00,00D9,4)          , 0                         , 38 , 0  , 892  , 69 , 0  ), // #209
+  INST(Fldl2e           , FpuOp              , O_FPU(00,D9EA,_)          , 0                         , 35 , 0  , 899  , 30 , 0  ), // #210
+  INST(Fldl2t           , FpuOp              , O_FPU(00,D9E9,_)          , 0                         , 35 , 0  , 906  , 30 , 0  ), // #211
+  INST(Fldlg2           , FpuOp              , O_FPU(00,D9EC,_)          , 0                         , 35 , 0  , 913  , 30 , 0  ), // #212
+  INST(Fldln2           , FpuOp              , O_FPU(00,D9ED,_)          , 0                         , 35 , 0  , 920  , 30 , 0  ), // #213
+  INST(Fldpi            , FpuOp              , O_FPU(00,D9EB,_)          , 0                         , 35 , 0  , 927  , 30 , 0  ), // #214
+  INST(Fldz             , FpuOp              , O_FPU(00,D9EE,_)          , 0                         , 35 , 0  , 933  , 30 , 0  ), // #215
+  INST(Fmul             , FpuArith           , O_FPU(00,C8C8,1)          , 0                         , 55 , 0  , 2163 , 60 , 0  ), // #216
+  INST(Fmulp            , FpuRDef            , O_FPU(00,DEC8,_)          , 0                         , 37 , 0  , 938  , 61 , 0  ), // #217
+  INST(Fnclex           , FpuOp              , O_FPU(00,DBE2,_)          , 0                         , 42 , 0  , 944  , 30 , 0  ), // #218
+  INST(Fninit           , FpuOp              , O_FPU(00,DBE3,_)          , 0                         , 42 , 0  , 951  , 30 , 0  ), // #219
+  INST(Fnop             , FpuOp              , O_FPU(00,D9D0,_)          , 0                         , 35 , 0  , 958  , 30 , 0  ), // #220
+  INST(Fnsave           , X86M_Only          , O_FPU(00,00DD,6)          , 0                         , 39 , 0  , 963  , 69 , 0  ), // #221
+  INST(Fnstcw           , X86M_Only          , O_FPU(00,00D9,7)          , 0                         , 52 , 0  , 970  , 68 , 0  ), // #222
+  INST(Fnstenv          , X86M_Only          , O_FPU(00,00D9,6)          , 0                         , 39 , 0  , 977  , 69 , 0  ), // #223
+  INST(Fnstsw           , FpuStsw            , O_FPU(00,00DD,7)          , O_FPU(00,DFE0,_)          , 52 , 12 , 985  , 70 , 0  ), // #224
+  INST(Fpatan           , FpuOp              , O_FPU(00,D9F3,_)          , 0                         , 35 , 0  , 992  , 30 , 0  ), // #225
+  INST(Fprem            , FpuOp              , O_FPU(00,D9F8,_)          , 0                         , 35 , 0  , 999  , 30 , 0  ), // #226
+  INST(Fprem1           , FpuOp              , O_FPU(00,D9F5,_)          , 0                         , 35 , 0  , 1005 , 30 , 0  ), // #227
+  INST(Fptan            , FpuOp              , O_FPU(00,D9F2,_)          , 0                         , 35 , 0  , 1012 , 30 , 0  ), // #228
+  INST(Frndint          , FpuOp              , O_FPU(00,D9FC,_)          , 0                         , 35 , 0  , 1018 , 30 , 0  ), // #229
+  INST(Frstor           , X86M_Only          , O_FPU(00,00DD,4)          , 0                         , 38 , 0  , 1026 , 69 , 0  ), // #230
+  INST(Fsave            , X86M_Only          , O_FPU(9B,00DD,6)          , 0                         , 56 , 0  , 1033 , 69 , 0  ), // #231
+  INST(Fscale           , FpuOp              , O_FPU(00,D9FD,_)          , 0                         , 35 , 0  , 1039 , 30 , 0  ), // #232
+  INST(Fsin             , FpuOp              , O_FPU(00,D9FE,_)          , 0                         , 35 , 0  , 1046 , 30 , 0  ), // #233
+  INST(Fsincos          , FpuOp              , O_FPU(00,D9FB,_)          , 0                         , 35 , 0  , 1051 , 30 , 0  ), // #234
+  INST(Fsqrt            , FpuOp              , O_FPU(00,D9FA,_)          , 0                         , 35 , 0  , 1059 , 30 , 0  ), // #235
+  INST(Fst              , FpuFldFst          , O_FPU(00,00D9,2)          , 0                         , 50 , 0  , 1065 , 71 , 0  ), // #236
+  INST(Fstcw            , X86M_Only          , O_FPU(9B,00D9,7)          , 0                         , 57 , 0  , 1069 , 68 , 0  ), // #237
+  INST(Fstenv           , X86M_Only          , O_FPU(9B,00D9,6)          , 0                         , 56 , 0  , 1075 , 69 , 0  ), // #238
+  INST(Fstp             , FpuFldFst          , O_FPU(00,00D9,3)          , O(000000,DB,7,_,_,_,_,_  ), 51 , 13 , 1082 , 67 , 0  ), // #239
+  INST(Fstsw            , FpuStsw            , O_FPU(9B,00DD,7)          , O_FPU(9B,DFE0,_)          , 57 , 14 , 1087 , 70 , 0  ), // #240
+  INST(Fsub             , FpuArith           , O_FPU(00,E0E8,4)          , 0                         , 58 , 0  , 2241 , 60 , 0  ), // #241
+  INST(Fsubp            , FpuRDef            , O_FPU(00,DEE8,_)          , 0                         , 37 , 0  , 1093 , 61 , 0  ), // #242
+  INST(Fsubr            , FpuArith           , O_FPU(00,E8E0,5)          , 0                         , 59 , 0  , 2247 , 60 , 0  ), // #243
+  INST(Fsubrp           , FpuRDef            , O_FPU(00,DEE0,_)          , 0                         , 37 , 0  , 1099 , 61 , 0  ), // #244
+  INST(Ftst             , FpuOp              , O_FPU(00,D9E4,_)          , 0                         , 35 , 0  , 1106 , 30 , 0  ), // #245
+  INST(Fucom            , FpuRDef            , O_FPU(00,DDE0,_)          , 0                         , 48 , 0  , 1111 , 61 , 0  ), // #246
+  INST(Fucomi           , FpuR               , O_FPU(00,DBE8,_)          , 0                         , 42 , 0  , 1117 , 63 , 50 ), // #247
+  INST(Fucomip          , FpuR               , O_FPU(00,DFE8,_)          , 0                         , 44 , 0  , 1124 , 63 , 50 ), // #248
+  INST(Fucomp           , FpuRDef            , O_FPU(00,DDE8,_)          , 0                         , 48 , 0  , 1132 , 61 , 0  ), // #249
+  INST(Fucompp          , FpuOp              , O_FPU(00,DAE9,_)          , 0                         , 41 , 0  , 1139 , 30 , 0  ), // #250
+  INST(Fwait            , X86Op              , O_FPU(00,009B,_)          , 0                         , 49 , 0  , 1147 , 30 , 0  ), // #251
+  INST(Fxam             , FpuOp              , O_FPU(00,D9E5,_)          , 0                         , 35 , 0  , 1153 , 30 , 0  ), // #252
+  INST(Fxch             , FpuR               , O_FPU(00,D9C8,_)          , 0                         , 35 , 0  , 1158 , 61 , 0  ), // #253
+  INST(Fxrstor          , X86M_Only          , O(000F00,AE,1,_,_,_,_,_  ), 0                         , 29 , 0  , 1163 , 69 , 52 ), // #254
+  INST(Fxrstor64        , X86M_Only          , O(000F00,AE,1,_,1,_,_,_  ), 0                         , 28 , 0  , 1171 , 72 , 52 ), // #255
+  INST(Fxsave           , X86M_Only          , O(000F00,AE,0,_,_,_,_,_  ), 0                         , 4  , 0  , 1181 , 69 , 52 ), // #256
+  INST(Fxsave64         , X86M_Only          , O(000F00,AE,0,_,1,_,_,_  ), 0                         , 60 , 0  , 1188 , 72 , 52 ), // #257
+  INST(Fxtract          , FpuOp              , O_FPU(00,D9F4,_)          , 0                         , 35 , 0  , 1197 , 30 , 0  ), // #258
+  INST(Fyl2x            , FpuOp              , O_FPU(00,D9F1,_)          , 0                         , 35 , 0  , 1205 , 30 , 0  ), // #259
+  INST(Fyl2xp1          , FpuOp              , O_FPU(00,D9F9,_)          , 0                         , 35 , 0  , 1211 , 30 , 0  ), // #260
+  INST(Getsec           , X86Op              , O(000F00,37,_,_,_,_,_,_  ), 0                         , 4  , 0  , 1219 , 30 , 53 ), // #261
+  INST(Gf2p8affineinvqb , ExtRmi             , O(660F3A,CF,_,_,_,_,_,_  ), 0                         , 8  , 0  , 6789 , 8  , 54 ), // #262
+  INST(Gf2p8affineqb    , ExtRmi             , O(660F3A,CE,_,_,_,_,_,_  ), 0                         , 8  , 0  , 6807 , 8  , 54 ), // #263
+  INST(Gf2p8mulb        , ExtRm              , O(660F38,CF,_,_,_,_,_,_  ), 0                         , 2  , 0  , 6822 , 5  , 54 ), // #264
+  INST(Haddpd           , ExtRm              , O(660F00,7C,_,_,_,_,_,_  ), 0                         , 3  , 0  , 6833 , 5  , 6  ), // #265
+  INST(Haddps           , ExtRm              , O(F20F00,7C,_,_,_,_,_,_  ), 0                         , 5  , 0  , 6841 , 5  , 6  ), // #266
+  INST(Hlt              , X86Op              , O(000000,F4,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1226 , 30 , 0  ), // #267
+  INST(Hreset           , X86Op_Mod11RM_I8   , O(F30F3A,F0,0,_,_,_,_,_  ), 0                         , 61 , 0  , 1230 , 73 , 55 ), // #268
+  INST(Hsubpd           , ExtRm              , O(660F00,7D,_,_,_,_,_,_  ), 0                         , 3  , 0  , 6849 , 5  , 6  ), // #269
+  INST(Hsubps           , ExtRm              , O(F20F00,7D,_,_,_,_,_,_  ), 0                         , 5  , 0  , 6857 , 5  , 6  ), // #270
+  INST(Idiv             , X86M_GPB_MulDiv    , O(000000,F6,7,_,x,_,_,_  ), 0                         , 27 , 0  , 809  , 54 , 1  ), // #271
+  INST(Imul             , X86Imul            , O(000000,F6,5,_,x,_,_,_  ), 0                         , 62 , 0  , 827  , 74 , 1  ), // #272
+  INST(In               , X86In              , O(000000,EC,_,_,_,_,_,_  ), O(000000,E4,_,_,_,_,_,_  ), 0  , 15 , 11572, 75 , 0  ), // #273
+  INST(Inc              , X86IncDec          , O(000000,FE,0,_,x,_,_,_  ), O(000000,40,_,_,x,_,_,_  ), 0  , 16 , 1237 , 53 , 45 ), // #274
+  INST(Incsspd          , X86M               , O(F30F00,AE,5,_,0,_,_,_  ), 0                         , 63 , 0  , 1241 , 76 , 56 ), // #275
+  INST(Incsspq          , X86M               , O(F30F00,AE,5,_,1,_,_,_  ), 0                         , 64 , 0  , 1249 , 77 , 56 ), // #276
+  INST(Ins              , X86Ins             , O(000000,6C,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1916 , 78 , 0  ), // #277
+  INST(Insertps         , ExtRmi             , O(660F3A,21,_,_,_,_,_,_  ), 0                         , 8  , 0  , 6993 , 38 , 12 ), // #278
+  INST(Insertq          , ExtInsertq         , O(F20F00,79,_,_,_,_,_,_  ), O(F20F00,78,_,_,_,_,_,_  ), 5  , 17 , 1257 , 79 , 49 ), // #279
+  INST(Int              , X86Int             , O(000000,CD,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1022 , 80 , 0  ), // #280
+  INST(Int3             , X86Op              , O(000000,CC,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1265 , 30 , 0  ), // #281
+  INST(Into             , X86Op              , O(000000,CE,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1270 , 81 , 57 ), // #282
+  INST(Invd             , X86Op              , O(000F00,08,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11501, 30 , 43 ), // #283
+  INST(Invept           , X86Rm_NoSize       , O(660F38,80,_,_,_,_,_,_  ), 0                         , 2  , 0  , 1275 , 82 , 58 ), // #284
+  INST(Invlpg           , X86M_Only          , O(000F00,01,7,_,_,_,_,_  ), 0                         , 22 , 0  , 1282 , 69 , 43 ), // #285
+  INST(Invlpga          , X86Op_xAddr        , O(000F01,DF,_,_,_,_,_,_  ), 0                         , 21 , 0  , 1289 , 83 , 22 ), // #286
+  INST(Invpcid          , X86Rm_NoSize       , O(660F38,82,_,_,_,_,_,_  ), 0                         , 2  , 0  , 1297 , 82 , 43 ), // #287
+  INST(Invvpid          , X86Rm_NoSize       , O(660F38,81,_,_,_,_,_,_  ), 0                         , 2  , 0  , 1305 , 82 , 58 ), // #288
+  INST(Iret             , X86Op              , O(660000,CF,_,_,_,_,_,_  ), 0                         , 19 , 0  , 3226 , 84 , 1  ), // #289
+  INST(Iretd            , X86Op              , O(000000,CF,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1313 , 84 , 1  ), // #290
+  INST(Iretq            , X86Op              , O(000000,CF,_,_,1,_,_,_  ), 0                         , 20 , 0  , 1319 , 85 , 1  ), // #291
+  INST(Ja               , X86Jcc             , O(000F00,87,_,_,_,_,_,_  ), O(000000,77,_,_,_,_,_,_  ), 4  , 18 , 1325 , 86 , 59 ), // #292
+  INST(Jae              , X86Jcc             , O(000F00,83,_,_,_,_,_,_  ), O(000000,73,_,_,_,_,_,_  ), 4  , 19 , 1328 , 86 , 60 ), // #293
+  INST(Jb               , X86Jcc             , O(000F00,82,_,_,_,_,_,_  ), O(000000,72,_,_,_,_,_,_  ), 4  , 20 , 1332 , 86 , 60 ), // #294
+  INST(Jbe              , X86Jcc             , O(000F00,86,_,_,_,_,_,_  ), O(000000,76,_,_,_,_,_,_  ), 4  , 21 , 1335 , 86 , 59 ), // #295
+  INST(Jc               , X86Jcc             , O(000F00,82,_,_,_,_,_,_  ), O(000000,72,_,_,_,_,_,_  ), 4  , 20 , 1339 , 86 , 60 ), // #296
+  INST(Je               , X86Jcc             , O(000F00,84,_,_,_,_,_,_  ), O(000000,74,_,_,_,_,_,_  ), 4  , 22 , 1342 , 86 , 61 ), // #297
+  INST(Jecxz            , X86JecxzLoop       , 0                         , O(000000,E3,_,_,_,_,_,_  ), 0  , 23 , 1345 , 87 , 0  ), // #298
+  INST(Jg               , X86Jcc             , O(000F00,8F,_,_,_,_,_,_  ), O(000000,7F,_,_,_,_,_,_  ), 4  , 24 , 1351 , 86 , 62 ), // #299
+  INST(Jge              , X86Jcc             , O(000F00,8D,_,_,_,_,_,_  ), O(000000,7D,_,_,_,_,_,_  ), 4  , 25 , 1354 , 86 , 63 ), // #300
+  INST(Jl               , X86Jcc             , O(000F00,8C,_,_,_,_,_,_  ), O(000000,7C,_,_,_,_,_,_  ), 4  , 26 , 1358 , 86 , 63 ), // #301
+  INST(Jle              , X86Jcc             , O(000F00,8E,_,_,_,_,_,_  ), O(000000,7E,_,_,_,_,_,_  ), 4  , 27 , 1361 , 86 , 62 ), // #302
+  INST(Jmp              , X86Jmp             , O(000000,FF,4,_,_,_,_,_  ), O(000000,EB,_,_,_,_,_,_  ), 9  , 28 , 1861 , 88 , 0  ), // #303
+  INST(Jna              , X86Jcc             , O(000F00,86,_,_,_,_,_,_  ), O(000000,76,_,_,_,_,_,_  ), 4  , 21 , 1365 , 86 , 59 ), // #304
+  INST(Jnae             , X86Jcc             , O(000F00,82,_,_,_,_,_,_  ), O(000000,72,_,_,_,_,_,_  ), 4  , 20 , 1369 , 86 , 60 ), // #305
+  INST(Jnb              , X86Jcc             , O(000F00,83,_,_,_,_,_,_  ), O(000000,73,_,_,_,_,_,_  ), 4  , 19 , 1374 , 86 , 60 ), // #306
+  INST(Jnbe             , X86Jcc             , O(000F00,87,_,_,_,_,_,_  ), O(000000,77,_,_,_,_,_,_  ), 4  , 18 , 1378 , 86 , 59 ), // #307
+  INST(Jnc              , X86Jcc             , O(000F00,83,_,_,_,_,_,_  ), O(000000,73,_,_,_,_,_,_  ), 4  , 19 , 1383 , 86 , 60 ), // #308
+  INST(Jne              , X86Jcc             , O(000F00,85,_,_,_,_,_,_  ), O(000000,75,_,_,_,_,_,_  ), 4  , 29 , 1387 , 86 , 61 ), // #309
+  INST(Jng              , X86Jcc             , O(000F00,8E,_,_,_,_,_,_  ), O(000000,7E,_,_,_,_,_,_  ), 4  , 27 , 1391 , 86 , 62 ), // #310
+  INST(Jnge             , X86Jcc             , O(000F00,8C,_,_,_,_,_,_  ), O(000000,7C,_,_,_,_,_,_  ), 4  , 26 , 1395 , 86 , 63 ), // #311
+  INST(Jnl              , X86Jcc             , O(000F00,8D,_,_,_,_,_,_  ), O(000000,7D,_,_,_,_,_,_  ), 4  , 25 , 1400 , 86 , 63 ), // #312
+  INST(Jnle             , X86Jcc             , O(000F00,8F,_,_,_,_,_,_  ), O(000000,7F,_,_,_,_,_,_  ), 4  , 24 , 1404 , 86 , 62 ), // #313
+  INST(Jno              , X86Jcc             , O(000F00,81,_,_,_,_,_,_  ), O(000000,71,_,_,_,_,_,_  ), 4  , 30 , 1409 , 86 , 57 ), // #314
+  INST(Jnp              , X86Jcc             , O(000F00,8B,_,_,_,_,_,_  ), O(000000,7B,_,_,_,_,_,_  ), 4  , 31 , 1413 , 86 , 64 ), // #315
+  INST(Jns              , X86Jcc             , O(000F00,89,_,_,_,_,_,_  ), O(000000,79,_,_,_,_,_,_  ), 4  , 32 , 1417 , 86 , 65 ), // #316
+  INST(Jnz              , X86Jcc             , O(000F00,85,_,_,_,_,_,_  ), O(000000,75,_,_,_,_,_,_  ), 4  , 29 , 1421 , 86 , 61 ), // #317
+  INST(Jo               , X86Jcc             , O(000F00,80,_,_,_,_,_,_  ), O(000000,70,_,_,_,_,_,_  ), 4  , 33 , 1425 , 86 , 57 ), // #318
+  INST(Jp               , X86Jcc             , O(000F00,8A,_,_,_,_,_,_  ), O(000000,7A,_,_,_,_,_,_  ), 4  , 34 , 1428 , 86 , 64 ), // #319
+  INST(Jpe              , X86Jcc             , O(000F00,8A,_,_,_,_,_,_  ), O(000000,7A,_,_,_,_,_,_  ), 4  , 34 , 1431 , 86 , 64 ), // #320
+  INST(Jpo              , X86Jcc             , O(000F00,8B,_,_,_,_,_,_  ), O(000000,7B,_,_,_,_,_,_  ), 4  , 31 , 1435 , 86 , 64 ), // #321
+  INST(Js               , X86Jcc             , O(000F00,88,_,_,_,_,_,_  ), O(000000,78,_,_,_,_,_,_  ), 4  , 35 , 1439 , 86 , 65 ), // #322
+  INST(Jz               , X86Jcc             , O(000F00,84,_,_,_,_,_,_  ), O(000000,74,_,_,_,_,_,_  ), 4  , 22 , 1442 , 86 , 61 ), // #323
+  INST(Kaddb            , VexRvm             , V(660F00,4A,_,1,0,_,_,_  ), 0                         , 65 , 0  , 1445 , 89 , 66 ), // #324
+  INST(Kaddd            , VexRvm             , V(660F00,4A,_,1,1,_,_,_  ), 0                         , 66 , 0  , 1451 , 89 , 67 ), // #325
+  INST(Kaddq            , VexRvm             , V(000F00,4A,_,1,1,_,_,_  ), 0                         , 67 , 0  , 1457 , 89 , 67 ), // #326
+  INST(Kaddw            , VexRvm             , V(000F00,4A,_,1,0,_,_,_  ), 0                         , 68 , 0  , 1463 , 89 , 66 ), // #327
+  INST(Kandb            , VexRvm             , V(660F00,41,_,1,0,_,_,_  ), 0                         , 65 , 0  , 1469 , 89 , 66 ), // #328
+  INST(Kandd            , VexRvm             , V(660F00,41,_,1,1,_,_,_  ), 0                         , 66 , 0  , 1475 , 89 , 67 ), // #329
+  INST(Kandnb           , VexRvm             , V(660F00,42,_,1,0,_,_,_  ), 0                         , 65 , 0  , 1481 , 89 , 66 ), // #330
+  INST(Kandnd           , VexRvm             , V(660F00,42,_,1,1,_,_,_  ), 0                         , 66 , 0  , 1488 , 89 , 67 ), // #331
+  INST(Kandnq           , VexRvm             , V(000F00,42,_,1,1,_,_,_  ), 0                         , 67 , 0  , 1495 , 89 , 67 ), // #332
+  INST(Kandnw           , VexRvm             , V(000F00,42,_,1,0,_,_,_  ), 0                         , 68 , 0  , 1502 , 89 , 68 ), // #333
+  INST(Kandq            , VexRvm             , V(000F00,41,_,1,1,_,_,_  ), 0                         , 67 , 0  , 1509 , 89 , 67 ), // #334
+  INST(Kandw            , VexRvm             , V(000F00,41,_,1,0,_,_,_  ), 0                         , 68 , 0  , 1515 , 89 , 68 ), // #335
+  INST(Kmovb            , VexKmov            , V(660F00,90,_,0,0,_,_,_  ), V(660F00,92,_,0,0,_,_,_  ), 69 , 36 , 1521 , 90 , 69 ), // #336
+  INST(Kmovd            , VexKmov            , V(660F00,90,_,0,1,_,_,_  ), V(F20F00,92,_,0,0,_,_,_  ), 70 , 37 , 9105 , 91 , 70 ), // #337
+  INST(Kmovq            , VexKmov            , V(000F00,90,_,0,1,_,_,_  ), V(F20F00,92,_,0,1,_,_,_  ), 71 , 38 , 9116 , 92 , 70 ), // #338
+  INST(Kmovw            , VexKmov            , V(000F00,90,_,0,0,_,_,_  ), V(000F00,92,_,0,0,_,_,_  ), 72 , 39 , 1527 , 93 , 71 ), // #339
+  INST(Knotb            , VexRm              , V(660F00,44,_,0,0,_,_,_  ), 0                         , 69 , 0  , 1533 , 94 , 66 ), // #340
+  INST(Knotd            , VexRm              , V(660F00,44,_,0,1,_,_,_  ), 0                         , 70 , 0  , 1539 , 94 , 67 ), // #341
+  INST(Knotq            , VexRm              , V(000F00,44,_,0,1,_,_,_  ), 0                         , 71 , 0  , 1545 , 94 , 67 ), // #342
+  INST(Knotw            , VexRm              , V(000F00,44,_,0,0,_,_,_  ), 0                         , 72 , 0  , 1551 , 94 , 68 ), // #343
+  INST(Korb             , VexRvm             , V(660F00,45,_,1,0,_,_,_  ), 0                         , 65 , 0  , 1557 , 89 , 66 ), // #344
+  INST(Kord             , VexRvm             , V(660F00,45,_,1,1,_,_,_  ), 0                         , 66 , 0  , 1562 , 89 , 67 ), // #345
+  INST(Korq             , VexRvm             , V(000F00,45,_,1,1,_,_,_  ), 0                         , 67 , 0  , 1567 , 89 , 67 ), // #346
+  INST(Kortestb         , VexRm              , V(660F00,98,_,0,0,_,_,_  ), 0                         , 69 , 0  , 1572 , 94 , 72 ), // #347
+  INST(Kortestd         , VexRm              , V(660F00,98,_,0,1,_,_,_  ), 0                         , 70 , 0  , 1581 , 94 , 73 ), // #348
+  INST(Kortestq         , VexRm              , V(000F00,98,_,0,1,_,_,_  ), 0                         , 71 , 0  , 1590 , 94 , 73 ), // #349
+  INST(Kortestw         , VexRm              , V(000F00,98,_,0,0,_,_,_  ), 0                         , 72 , 0  , 1599 , 94 , 74 ), // #350
+  INST(Korw             , VexRvm             , V(000F00,45,_,1,0,_,_,_  ), 0                         , 68 , 0  , 1608 , 89 , 68 ), // #351
+  INST(Kshiftlb         , VexRmi             , V(660F3A,32,_,0,0,_,_,_  ), 0                         , 73 , 0  , 1613 , 95 , 66 ), // #352
+  INST(Kshiftld         , VexRmi             , V(660F3A,33,_,0,0,_,_,_  ), 0                         , 73 , 0  , 1622 , 95 , 67 ), // #353
+  INST(Kshiftlq         , VexRmi             , V(660F3A,33,_,0,1,_,_,_  ), 0                         , 74 , 0  , 1631 , 95 , 67 ), // #354
+  INST(Kshiftlw         , VexRmi             , V(660F3A,32,_,0,1,_,_,_  ), 0                         , 74 , 0  , 1640 , 95 , 68 ), // #355
+  INST(Kshiftrb         , VexRmi             , V(660F3A,30,_,0,0,_,_,_  ), 0                         , 73 , 0  , 1649 , 95 , 66 ), // #356
+  INST(Kshiftrd         , VexRmi             , V(660F3A,31,_,0,0,_,_,_  ), 0                         , 73 , 0  , 1658 , 95 , 67 ), // #357
+  INST(Kshiftrq         , VexRmi             , V(660F3A,31,_,0,1,_,_,_  ), 0                         , 74 , 0  , 1667 , 95 , 67 ), // #358
+  INST(Kshiftrw         , VexRmi             , V(660F3A,30,_,0,1,_,_,_  ), 0                         , 74 , 0  , 1676 , 95 , 68 ), // #359
+  INST(Ktestb           , VexRm              , V(660F00,99,_,0,0,_,_,_  ), 0                         , 69 , 0  , 1685 , 94 , 72 ), // #360
+  INST(Ktestd           , VexRm              , V(660F00,99,_,0,1,_,_,_  ), 0                         , 70 , 0  , 1692 , 94 , 73 ), // #361
+  INST(Ktestq           , VexRm              , V(000F00,99,_,0,1,_,_,_  ), 0                         , 71 , 0  , 1699 , 94 , 73 ), // #362
+  INST(Ktestw           , VexRm              , V(000F00,99,_,0,0,_,_,_  ), 0                         , 72 , 0  , 1706 , 94 , 72 ), // #363
+  INST(Kunpckbw         , VexRvm             , V(660F00,4B,_,1,0,_,_,_  ), 0                         , 65 , 0  , 1713 , 89 , 68 ), // #364
+  INST(Kunpckdq         , VexRvm             , V(000F00,4B,_,1,1,_,_,_  ), 0                         , 67 , 0  , 1722 , 89 , 67 ), // #365
+  INST(Kunpckwd         , VexRvm             , V(000F00,4B,_,1,0,_,_,_  ), 0                         , 68 , 0  , 1731 , 89 , 67 ), // #366
+  INST(Kxnorb           , VexRvm             , V(660F00,46,_,1,0,_,_,_  ), 0                         , 65 , 0  , 1740 , 96 , 66 ), // #367
+  INST(Kxnord           , VexRvm             , V(660F00,46,_,1,1,_,_,_  ), 0                         , 66 , 0  , 1747 , 96 , 67 ), // #368
+  INST(Kxnorq           , VexRvm             , V(000F00,46,_,1,1,_,_,_  ), 0                         , 67 , 0  , 1754 , 96 , 67 ), // #369
+  INST(Kxnorw           , VexRvm             , V(000F00,46,_,1,0,_,_,_  ), 0                         , 68 , 0  , 1761 , 96 , 68 ), // #370
+  INST(Kxorb            , VexRvm             , V(660F00,47,_,1,0,_,_,_  ), 0                         , 65 , 0  , 1768 , 96 , 66 ), // #371
+  INST(Kxord            , VexRvm             , V(660F00,47,_,1,1,_,_,_  ), 0                         , 66 , 0  , 1774 , 96 , 67 ), // #372
+  INST(Kxorq            , VexRvm             , V(000F00,47,_,1,1,_,_,_  ), 0                         , 67 , 0  , 1780 , 96 , 67 ), // #373
+  INST(Kxorw            , VexRvm             , V(000F00,47,_,1,0,_,_,_  ), 0                         , 68 , 0  , 1786 , 96 , 68 ), // #374
+  INST(Lahf             , X86Op              , O(000000,9F,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1792 , 97 , 75 ), // #375
+  INST(Lar              , X86Rm              , O(000F00,02,_,_,_,_,_,_  ), 0                         , 4  , 0  , 1797 , 98 , 10 ), // #376
+  INST(Lcall            , X86LcallLjmp       , O(000000,FF,3,_,_,_,_,_  ), O(000000,9A,_,_,_,_,_,_  ), 75 , 40 , 1801 , 99 , 1  ), // #377
+  INST(Lddqu            , ExtRm              , O(F20F00,F0,_,_,_,_,_,_  ), 0                         , 5  , 0  , 7003 , 100, 6  ), // #378
+  INST(Ldmxcsr          , X86M_Only          , O(000F00,AE,2,_,_,_,_,_  ), 0                         , 76 , 0  , 7010 , 101, 5  ), // #379
+  INST(Lds              , X86Rm              , O(000000,C5,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1807 , 102, 0  ), // #380
+  INST(Ldtilecfg        , AmxCfg             , V(000F38,49,_,0,0,_,_,_  ), 0                         , 10 , 0  , 1811 , 103, 76 ), // #381
+  INST(Lea              , X86Lea             , O(000000,8D,_,_,x,_,_,_  ), 0                         , 0  , 0  , 1821 , 104, 0  ), // #382
+  INST(Leave            , X86Op              , O(000000,C9,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1825 , 30 , 0  ), // #383
+  INST(Les              , X86Rm              , O(000000,C4,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1831 , 102, 0  ), // #384
+  INST(Lfence           , X86Fence           , O(000F00,AE,5,_,_,_,_,_  ), 0                         , 77 , 0  , 1835 , 30 , 4  ), // #385
+  INST(Lfs              , X86Rm              , O(000F00,B4,_,_,_,_,_,_  ), 0                         , 4  , 0  , 1842 , 105, 0  ), // #386
+  INST(Lgdt             , X86M_Only          , O(000F00,01,2,_,_,_,_,_  ), 0                         , 76 , 0  , 1846 , 69 , 0  ), // #387
+  INST(Lgs              , X86Rm              , O(000F00,B5,_,_,_,_,_,_  ), 0                         , 4  , 0  , 1851 , 105, 0  ), // #388
+  INST(Lidt             , X86M_Only          , O(000F00,01,3,_,_,_,_,_  ), 0                         , 78 , 0  , 1855 , 69 , 0  ), // #389
+  INST(Ljmp             , X86LcallLjmp       , O(000000,FF,5,_,_,_,_,_  ), O(000000,EA,_,_,_,_,_,_  ), 62 , 41 , 1860 , 106, 0  ), // #390
+  INST(Lldt             , X86M_NoSize        , O(000F00,00,2,_,_,_,_,_  ), 0                         , 76 , 0  , 1865 , 107, 0  ), // #391
+  INST(Llwpcb           , VexR_Wx            , V(XOP_M9,12,0,0,x,_,_,_  ), 0                         , 79 , 0  , 1870 , 108, 77 ), // #392
+  INST(Lmsw             , X86M_NoSize        , O(000F00,01,6,_,_,_,_,_  ), 0                         , 80 , 0  , 1877 , 107, 0  ), // #393
+  INST(Lods             , X86StrRm           , O(000000,AC,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1882 , 109, 78 ), // #394
+  INST(Loop             , X86JecxzLoop       , 0                         , O(000000,E2,_,_,_,_,_,_  ), 0  , 42 , 1887 , 110, 0  ), // #395
+  INST(Loope            , X86JecxzLoop       , 0                         , O(000000,E1,_,_,_,_,_,_  ), 0  , 43 , 1892 , 110, 61 ), // #396
+  INST(Loopne           , X86JecxzLoop       , 0                         , O(000000,E0,_,_,_,_,_,_  ), 0  , 44 , 1898 , 110, 61 ), // #397
+  INST(Lsl              , X86Rm              , O(000F00,03,_,_,_,_,_,_  ), 0                         , 4  , 0  , 1905 , 111, 10 ), // #398
+  INST(Lss              , X86Rm              , O(000F00,B2,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7556 , 105, 0  ), // #399
+  INST(Ltr              , X86M_NoSize        , O(000F00,00,3,_,_,_,_,_  ), 0                         , 78 , 0  , 1909 , 107, 0  ), // #400
+  INST(Lwpins           , VexVmi4_Wx         , V(XOP_MA,12,0,0,x,_,_,_  ), 0                         , 81 , 0  , 1913 , 112, 77 ), // #401
+  INST(Lwpval           , VexVmi4_Wx         , V(XOP_MA,12,1,0,x,_,_,_  ), 0                         , 82 , 0  , 1920 , 112, 77 ), // #402
+  INST(Lzcnt            , X86Rm_Raw66H       , O(F30F00,BD,_,_,x,_,_,_  ), 0                         , 6  , 0  , 1927 , 22 , 79 ), // #403
+  INST(Maskmovdqu       , ExtRm_ZDI          , O(660F00,F7,_,_,_,_,_,_  ), 0                         , 3  , 0  , 7019 , 113, 4  ), // #404
+  INST(Maskmovq         , ExtRm_ZDI          , O(000F00,F7,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9113 , 114, 80 ), // #405
+  INST(Maxpd            , ExtRm              , O(660F00,5F,_,_,_,_,_,_  ), 0                         , 3  , 0  , 7053 , 5  , 4  ), // #406
+  INST(Maxps            , ExtRm              , O(000F00,5F,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7067 , 5  , 5  ), // #407
+  INST(Maxsd            , ExtRm              , O(F20F00,5F,_,_,_,_,_,_  ), 0                         , 5  , 0  , 9132 , 6  , 4  ), // #408
+  INST(Maxss            , ExtRm              , O(F30F00,5F,_,_,_,_,_,_  ), 0                         , 6  , 0  , 7088 , 7  , 5  ), // #409
+  INST(Mcommit          , X86Op              , O(F30F01,FA,_,_,_,_,_,_  ), 0                         , 25 , 0  , 1933 , 30 , 81 ), // #410
+  INST(Mfence           , X86Fence           , O(000F00,AE,6,_,_,_,_,_  ), 0                         , 80 , 0  , 1941 , 30 , 4  ), // #411
+  INST(Minpd            , ExtRm              , O(660F00,5D,_,_,_,_,_,_  ), 0                         , 3  , 0  , 7117 , 5  , 4  ), // #412
+  INST(Minps            , ExtRm              , O(000F00,5D,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7131 , 5  , 5  ), // #413
+  INST(Minsd            , ExtRm              , O(F20F00,5D,_,_,_,_,_,_  ), 0                         , 5  , 0  , 9196 , 6  , 4  ), // #414
+  INST(Minss            , ExtRm              , O(F30F00,5D,_,_,_,_,_,_  ), 0                         , 6  , 0  , 7152 , 7  , 5  ), // #415
+  INST(Monitor          , X86Op              , O(000F01,C8,_,_,_,_,_,_  ), 0                         , 21 , 0  , 3232 , 115, 82 ), // #416
+  INST(Monitorx         , X86Op              , O(000F01,FA,_,_,_,_,_,_  ), 0                         , 21 , 0  , 1948 , 115, 83 ), // #417
+  INST(Mov              , X86Mov             , 0                         , 0                         , 0  , 0  , 138  , 116, 84 ), // #418
+  INST(Movabs           , X86Movabs          , 0                         , 0                         , 0  , 0  , 1957 , 117, 0  ), // #419
+  INST(Movapd           , ExtMov             , O(660F00,28,_,_,_,_,_,_  ), O(660F00,29,_,_,_,_,_,_  ), 3  , 45 , 7183 , 118, 85 ), // #420
+  INST(Movaps           , ExtMov             , O(000F00,28,_,_,_,_,_,_  ), O(000F00,29,_,_,_,_,_,_  ), 4  , 46 , 7191 , 118, 86 ), // #421
+  INST(Movbe            , ExtMovbe           , O(000F38,F0,_,_,x,_,_,_  ), O(000F38,F1,_,_,x,_,_,_  ), 83 , 47 , 656  , 119, 87 ), // #422
+  INST(Movd             , ExtMovd            , O(000F00,6E,_,_,_,_,_,_  ), O(000F00,7E,_,_,_,_,_,_  ), 4  , 48 , 9106 , 120, 88 ), // #423
+  INST(Movddup          , ExtMov             , O(F20F00,12,_,_,_,_,_,_  ), 0                         , 5  , 0  , 7205 , 6  , 6  ), // #424
+  INST(Movdir64b        , X86EnqcmdMovdir64b , O(660F38,F8,_,_,_,_,_,_  ), 0                         , 2  , 0  , 1964 , 121, 89 ), // #425
+  INST(Movdiri          , X86MovntiMovdiri   , O(000F38,F9,_,_,_,_,_,_  ), 0                         , 83 , 0  , 1974 , 122, 90 ), // #426
+  INST(Movdq2q          , ExtMov             , O(F20F00,D6,_,_,_,_,_,_  ), 0                         , 5  , 0  , 1982 , 123, 4  ), // #427
+  INST(Movdqa           , ExtMov             , O(660F00,6F,_,_,_,_,_,_  ), O(660F00,7F,_,_,_,_,_,_  ), 3  , 49 , 7214 , 118, 85 ), // #428
+  INST(Movdqu           , ExtMov             , O(F30F00,6F,_,_,_,_,_,_  ), O(F30F00,7F,_,_,_,_,_,_  ), 6  , 50 , 7023 , 118, 85 ), // #429
+  INST(Movhlps          , ExtMov             , O(000F00,12,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7289 , 124, 5  ), // #430
+  INST(Movhpd           , ExtMov             , O(660F00,16,_,_,_,_,_,_  ), O(660F00,17,_,_,_,_,_,_  ), 3  , 51 , 7298 , 125, 4  ), // #431
+  INST(Movhps           , ExtMov             , O(000F00,16,_,_,_,_,_,_  ), O(000F00,17,_,_,_,_,_,_  ), 4  , 52 , 7306 , 125, 5  ), // #432
+  INST(Movlhps          , ExtMov             , O(000F00,16,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7314 , 124, 5  ), // #433
+  INST(Movlpd           , ExtMov             , O(660F00,12,_,_,_,_,_,_  ), O(660F00,13,_,_,_,_,_,_  ), 3  , 53 , 7323 , 125, 4  ), // #434
+  INST(Movlps           , ExtMov             , O(000F00,12,_,_,_,_,_,_  ), O(000F00,13,_,_,_,_,_,_  ), 4  , 54 , 7331 , 125, 5  ), // #435
+  INST(Movmskpd         , ExtMov             , O(660F00,50,_,_,_,_,_,_  ), 0                         , 3  , 0  , 7339 , 126, 4  ), // #436
+  INST(Movmskps         , ExtMov             , O(000F00,50,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7349 , 126, 5  ), // #437
+  INST(Movntdq          , ExtMov             , 0                         , O(660F00,E7,_,_,_,_,_,_  ), 0  , 55 , 7359 , 127, 4  ), // #438
+  INST(Movntdqa         , ExtMov             , O(660F38,2A,_,_,_,_,_,_  ), 0                         , 2  , 0  , 7368 , 100, 12 ), // #439
+  INST(Movnti           , X86MovntiMovdiri   , O(000F00,C3,_,_,x,_,_,_  ), 0                         , 4  , 0  , 1990 , 122, 4  ), // #440
+  INST(Movntpd          , ExtMov             , 0                         , O(660F00,2B,_,_,_,_,_,_  ), 0  , 56 , 7378 , 127, 4  ), // #441
+  INST(Movntps          , ExtMov             , 0                         , O(000F00,2B,_,_,_,_,_,_  ), 0  , 57 , 7387 , 127, 5  ), // #442
+  INST(Movntq           , ExtMov             , 0                         , O(000F00,E7,_,_,_,_,_,_  ), 0  , 58 , 1997 , 128, 80 ), // #443
+  INST(Movntsd          , ExtMov             , 0                         , O(F20F00,2B,_,_,_,_,_,_  ), 0  , 59 , 2004 , 129, 49 ), // #444
+  INST(Movntss          , ExtMov             , 0                         , O(F30F00,2B,_,_,_,_,_,_  ), 0  , 60 , 2012 , 130, 49 ), // #445
+  INST(Movq             , ExtMovq            , O(000F00,6E,_,_,x,_,_,_  ), O(000F00,7E,_,_,x,_,_,_  ), 4  , 48 , 9117 , 131, 91 ), // #446
+  INST(Movq2dq          , ExtRm              , O(F30F00,D6,_,_,_,_,_,_  ), 0                         , 6  , 0  , 2020 , 132, 4  ), // #447
+  INST(Movs             , X86StrMm           , O(000000,A4,_,_,_,_,_,_  ), 0                         , 0  , 0  , 439  , 133, 78 ), // #448
+  INST(Movsd            , ExtMov             , O(F20F00,10,_,_,_,_,_,_  ), O(F20F00,11,_,_,_,_,_,_  ), 5  , 61 , 7402 , 134, 85 ), // #449
+  INST(Movshdup         , ExtRm              , O(F30F00,16,_,_,_,_,_,_  ), 0                         , 6  , 0  , 7416 , 5  , 6  ), // #450
+  INST(Movsldup         , ExtRm              , O(F30F00,12,_,_,_,_,_,_  ), 0                         , 6  , 0  , 7426 , 5  , 6  ), // #451
+  INST(Movss            , ExtMov             , O(F30F00,10,_,_,_,_,_,_  ), O(F30F00,11,_,_,_,_,_,_  ), 6  , 62 , 7436 , 135, 86 ), // #452
+  INST(Movsx            , X86MovsxMovzx      , O(000F00,BE,_,_,x,_,_,_  ), 0                         , 4  , 0  , 2028 , 136, 0  ), // #453
+  INST(Movsxd           , X86Rm              , O(000000,63,_,_,x,_,_,_  ), 0                         , 0  , 0  , 2034 , 137, 0  ), // #454
+  INST(Movupd           , ExtMov             , O(660F00,10,_,_,_,_,_,_  ), O(660F00,11,_,_,_,_,_,_  ), 3  , 63 , 7443 , 118, 85 ), // #455
+  INST(Movups           , ExtMov             , O(000F00,10,_,_,_,_,_,_  ), O(000F00,11,_,_,_,_,_,_  ), 4  , 64 , 7451 , 118, 86 ), // #456
+  INST(Movzx            , X86MovsxMovzx      , O(000F00,B6,_,_,x,_,_,_  ), 0                         , 4  , 0  , 2041 , 136, 0  ), // #457
+  INST(Mpsadbw          , ExtRmi             , O(660F3A,42,_,_,_,_,_,_  ), 0                         , 8  , 0  , 7465 , 8  , 12 ), // #458
+  INST(Mul              , X86M_GPB_MulDiv    , O(000000,F6,4,_,x,_,_,_  ), 0                         , 9  , 0  , 828  , 54 , 1  ), // #459
+  INST(Mulpd            , ExtRm              , O(660F00,59,_,_,_,_,_,_  ), 0                         , 3  , 0  , 7519 , 5  , 4  ), // #460
+  INST(Mulps            , ExtRm              , O(000F00,59,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7533 , 5  , 5  ), // #461
+  INST(Mulsd            , ExtRm              , O(F20F00,59,_,_,_,_,_,_  ), 0                         , 5  , 0  , 7540 , 6  , 4  ), // #462
+  INST(Mulss            , ExtRm              , O(F30F00,59,_,_,_,_,_,_  ), 0                         , 6  , 0  , 7554 , 7  , 5  ), // #463
+  INST(Mulx             , VexRvm_ZDX_Wx      , V(F20F38,F6,_,0,x,_,_,_  ), 0                         , 84 , 0  , 2047 , 138, 92 ), // #464
+  INST(Mwait            , X86Op              , O(000F01,C9,_,_,_,_,_,_  ), 0                         , 21 , 0  , 3241 , 139, 82 ), // #465
+  INST(Mwaitx           , X86Op              , O(000F01,FB,_,_,_,_,_,_  ), 0                         , 21 , 0  , 2052 , 140, 83 ), // #466
+  INST(Neg              , X86M_GPB           , O(000000,F6,3,_,x,_,_,_  ), 0                         , 75 , 0  , 2059 , 141, 1  ), // #467
+  INST(Nop              , X86M_Nop           , O(000000,90,_,_,_,_,_,_  ), 0                         , 0  , 0  , 959  , 142, 0  ), // #468
+  INST(Not              , X86M_GPB           , O(000000,F6,2,_,x,_,_,_  ), 0                         , 1  , 0  , 2063 , 141, 0  ), // #469
+  INST(Or               , X86Arith           , O(000000,08,1,_,x,_,_,_  ), 0                         , 31 , 0  , 3237 , 143, 1  ), // #470
+  INST(Orpd             , ExtRm              , O(660F00,56,_,_,_,_,_,_  ), 0                         , 3  , 0  , 11458, 11 , 4  ), // #471
+  INST(Orps             , ExtRm              , O(000F00,56,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11465, 11 , 5  ), // #472
+  INST(Out              , X86Out             , O(000000,EE,_,_,_,_,_,_  ), O(000000,E6,_,_,_,_,_,_  ), 0  , 65 , 2067 , 144, 0  ), // #473
+  INST(Outs             , X86Outs            , O(000000,6E,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2071 , 145, 0  ), // #474
+  INST(Pabsb            , ExtRm_P            , O(000F38,1C,_,_,_,_,_,_  ), 0                         , 83 , 0  , 7636 , 146, 93 ), // #475
+  INST(Pabsd            , ExtRm_P            , O(000F38,1E,_,_,_,_,_,_  ), 0                         , 83 , 0  , 7643 , 146, 93 ), // #476
+  INST(Pabsw            , ExtRm_P            , O(000F38,1D,_,_,_,_,_,_  ), 0                         , 83 , 0  , 7657 , 146, 93 ), // #477
+  INST(Packssdw         , ExtRm_P            , O(000F00,6B,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7664 , 146, 88 ), // #478
+  INST(Packsswb         , ExtRm_P            , O(000F00,63,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7674 , 146, 88 ), // #479
+  INST(Packusdw         , ExtRm              , O(660F38,2B,_,_,_,_,_,_  ), 0                         , 2  , 0  , 7684 , 5  , 12 ), // #480
+  INST(Packuswb         , ExtRm_P            , O(000F00,67,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7694 , 146, 88 ), // #481
+  INST(Paddb            , ExtRm_P            , O(000F00,FC,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7704 , 146, 88 ), // #482
+  INST(Paddd            , ExtRm_P            , O(000F00,FE,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7711 , 146, 88 ), // #483
+  INST(Paddq            , ExtRm_P            , O(000F00,D4,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7718 , 146, 4  ), // #484
+  INST(Paddsb           , ExtRm_P            , O(000F00,EC,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7725 , 146, 88 ), // #485
+  INST(Paddsw           , ExtRm_P            , O(000F00,ED,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7733 , 146, 88 ), // #486
+  INST(Paddusb          , ExtRm_P            , O(000F00,DC,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7741 , 146, 88 ), // #487
+  INST(Paddusw          , ExtRm_P            , O(000F00,DD,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7750 , 146, 88 ), // #488
+  INST(Paddw            , ExtRm_P            , O(000F00,FD,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7759 , 146, 88 ), // #489
+  INST(Palignr          , ExtRmi_P           , O(000F3A,0F,_,_,_,_,_,_  ), 0                         , 85 , 0  , 7766 , 147, 6  ), // #490
+  INST(Pand             , ExtRm_P            , O(000F00,DB,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7775 , 148, 88 ), // #491
+  INST(Pandn            , ExtRm_P            , O(000F00,DF,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7788 , 149, 88 ), // #492
+  INST(Pause            , X86Op              , O(F30000,90,_,_,_,_,_,_  ), 0                         , 86 , 0  , 3195 , 30 , 0  ), // #493
+  INST(Pavgb            , ExtRm_P            , O(000F00,E0,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7818 , 146, 94 ), // #494
+  INST(Pavgusb          , Ext3dNow           , O(000F0F,BF,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2076 , 150, 51 ), // #495
+  INST(Pavgw            , ExtRm_P            , O(000F00,E3,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7825 , 146, 94 ), // #496
+  INST(Pblendvb         , ExtRm_XMM0         , O(660F38,10,_,_,_,_,_,_  ), 0                         , 2  , 0  , 7881 , 15 , 12 ), // #497
+  INST(Pblendw          , ExtRmi             , O(660F3A,0E,_,_,_,_,_,_  ), 0                         , 8  , 0  , 7891 , 8  , 12 ), // #498
+  INST(Pclmulqdq        , ExtRmi             , O(660F3A,44,_,_,_,_,_,_  ), 0                         , 8  , 0  , 7984 , 8  , 95 ), // #499
+  INST(Pcmpeqb          , ExtRm_P            , O(000F00,74,_,_,_,_,_,_  ), 0                         , 4  , 0  , 8016 , 149, 88 ), // #500
+  INST(Pcmpeqd          , ExtRm_P            , O(000F00,76,_,_,_,_,_,_  ), 0                         , 4  , 0  , 8025 , 149, 88 ), // #501
+  INST(Pcmpeqq          , ExtRm              , O(660F38,29,_,_,_,_,_,_  ), 0                         , 2  , 0  , 8034 , 151, 12 ), // #502
+  INST(Pcmpeqw          , ExtRm_P            , O(000F00,75,_,_,_,_,_,_  ), 0                         , 4  , 0  , 8043 , 149, 88 ), // #503
+  INST(Pcmpestri        , ExtRmi             , O(660F3A,61,_,_,_,_,_,_  ), 0                         , 8  , 0  , 8052 , 152, 96 ), // #504
+  INST(Pcmpestrm        , ExtRmi             , O(660F3A,60,_,_,_,_,_,_  ), 0                         , 8  , 0  , 8063 , 153, 96 ), // #505
+  INST(Pcmpgtb          , ExtRm_P            , O(000F00,64,_,_,_,_,_,_  ), 0                         , 4  , 0  , 8074 , 149, 88 ), // #506
+  INST(Pcmpgtd          , ExtRm_P            , O(000F00,66,_,_,_,_,_,_  ), 0                         , 4  , 0  , 8083 , 149, 88 ), // #507
+  INST(Pcmpgtq          , ExtRm              , O(660F38,37,_,_,_,_,_,_  ), 0                         , 2  , 0  , 8092 , 151, 44 ), // #508
+  INST(Pcmpgtw          , ExtRm_P            , O(000F00,65,_,_,_,_,_,_  ), 0                         , 4  , 0  , 8101 , 149, 88 ), // #509
+  INST(Pcmpistri        , ExtRmi             , O(660F3A,63,_,_,_,_,_,_  ), 0                         , 8  , 0  , 8110 , 154, 96 ), // #510
+  INST(Pcmpistrm        , ExtRmi             , O(660F3A,62,_,_,_,_,_,_  ), 0                         , 8  , 0  , 8121 , 155, 96 ), // #511
+  INST(Pconfig          , X86Op              , O(000F01,C5,_,_,_,_,_,_  ), 0                         , 21 , 0  , 2084 , 30 , 97 ), // #512
+  INST(Pdep             , VexRvm_Wx          , V(F20F38,F5,_,0,x,_,_,_  ), 0                         , 84 , 0  , 2092 , 10 , 92 ), // #513
+  INST(Pext             , VexRvm_Wx          , V(F30F38,F5,_,0,x,_,_,_  ), 0                         , 88 , 0  , 2097 , 10 , 92 ), // #514
+  INST(Pextrb           , ExtExtract         , O(000F3A,14,_,_,_,_,_,_  ), 0                         , 85 , 0  , 8608 , 156, 12 ), // #515
+  INST(Pextrd           , ExtExtract         , O(000F3A,16,_,_,_,_,_,_  ), 0                         , 85 , 0  , 8616 , 58 , 12 ), // #516
+  INST(Pextrq           , ExtExtract         , O(000F3A,16,_,_,1,_,_,_  ), 0                         , 89 , 0  , 8624 , 157, 12 ), // #517
+  INST(Pextrw           , ExtPextrw          , O(000F00,C5,_,_,_,_,_,_  ), O(000F3A,15,_,_,_,_,_,_  ), 4  , 66 , 8632 , 158, 98 ), // #518
+  INST(Pf2id            , Ext3dNow           , O(000F0F,1D,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2102 , 150, 51 ), // #519
+  INST(Pf2iw            , Ext3dNow           , O(000F0F,1C,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2108 , 150, 99 ), // #520
+  INST(Pfacc            , Ext3dNow           , O(000F0F,AE,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2114 , 150, 51 ), // #521
+  INST(Pfadd            , Ext3dNow           , O(000F0F,9E,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2120 , 150, 51 ), // #522
+  INST(Pfcmpeq          , Ext3dNow           , O(000F0F,B0,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2126 , 150, 51 ), // #523
+  INST(Pfcmpge          , Ext3dNow           , O(000F0F,90,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2134 , 150, 51 ), // #524
+  INST(Pfcmpgt          , Ext3dNow           , O(000F0F,A0,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2142 , 150, 51 ), // #525
+  INST(Pfmax            , Ext3dNow           , O(000F0F,A4,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2150 , 150, 51 ), // #526
+  INST(Pfmin            , Ext3dNow           , O(000F0F,94,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2156 , 150, 51 ), // #527
+  INST(Pfmul            , Ext3dNow           , O(000F0F,B4,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2162 , 150, 51 ), // #528
+  INST(Pfnacc           , Ext3dNow           , O(000F0F,8A,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2168 , 150, 99 ), // #529
+  INST(Pfpnacc          , Ext3dNow           , O(000F0F,8E,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2175 , 150, 99 ), // #530
+  INST(Pfrcp            , Ext3dNow           , O(000F0F,96,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2183 , 150, 51 ), // #531
+  INST(Pfrcpit1         , Ext3dNow           , O(000F0F,A6,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2189 , 150, 51 ), // #532
+  INST(Pfrcpit2         , Ext3dNow           , O(000F0F,B6,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2198 , 150, 51 ), // #533
+  INST(Pfrcpv           , Ext3dNow           , O(000F0F,86,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2207 , 150, 100), // #534
+  INST(Pfrsqit1         , Ext3dNow           , O(000F0F,A7,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2214 , 150, 51 ), // #535
+  INST(Pfrsqrt          , Ext3dNow           , O(000F0F,97,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2223 , 150, 51 ), // #536
+  INST(Pfrsqrtv         , Ext3dNow           , O(000F0F,87,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2231 , 150, 100), // #537
+  INST(Pfsub            , Ext3dNow           , O(000F0F,9A,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2240 , 150, 51 ), // #538
+  INST(Pfsubr           , Ext3dNow           , O(000F0F,AA,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2246 , 150, 51 ), // #539
+  INST(Phaddd           , ExtRm_P            , O(000F38,02,_,_,_,_,_,_  ), 0                         , 83 , 0  , 8711 , 146, 93 ), // #540
+  INST(Phaddsw          , ExtRm_P            , O(000F38,03,_,_,_,_,_,_  ), 0                         , 83 , 0  , 8728 , 146, 93 ), // #541
+  INST(Phaddw           , ExtRm_P            , O(000F38,01,_,_,_,_,_,_  ), 0                         , 83 , 0  , 8797 , 146, 93 ), // #542
+  INST(Phminposuw       , ExtRm              , O(660F38,41,_,_,_,_,_,_  ), 0                         , 2  , 0  , 8823 , 5  , 12 ), // #543
+  INST(Phsubd           , ExtRm_P            , O(000F38,06,_,_,_,_,_,_  ), 0                         , 83 , 0  , 8844 , 146, 93 ), // #544
+  INST(Phsubsw          , ExtRm_P            , O(000F38,07,_,_,_,_,_,_  ), 0                         , 83 , 0  , 8861 , 146, 93 ), // #545
+  INST(Phsubw           , ExtRm_P            , O(000F38,05,_,_,_,_,_,_  ), 0                         , 83 , 0  , 8870 , 146, 93 ), // #546
+  INST(Pi2fd            , Ext3dNow           , O(000F0F,0D,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2253 , 150, 51 ), // #547
+  INST(Pi2fw            , Ext3dNow           , O(000F0F,0C,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2259 , 150, 99 ), // #548
+  INST(Pinsrb           , ExtRmi             , O(660F3A,20,_,_,_,_,_,_  ), 0                         , 8  , 0  , 8887 , 159, 12 ), // #549
+  INST(Pinsrd           , ExtRmi             , O(660F3A,22,_,_,_,_,_,_  ), 0                         , 8  , 0  , 8895 , 160, 12 ), // #550
+  INST(Pinsrq           , ExtRmi             , O(660F3A,22,_,_,1,_,_,_  ), 0                         , 90 , 0  , 8903 , 161, 12 ), // #551
+  INST(Pinsrw           , ExtRmi_P           , O(000F00,C4,_,_,_,_,_,_  ), 0                         , 4  , 0  , 8911 , 162, 94 ), // #552
+  INST(Pmaddubsw        , ExtRm_P            , O(000F38,04,_,_,_,_,_,_  ), 0                         , 83 , 0  , 9081 , 146, 93 ), // #553
+  INST(Pmaddwd          , ExtRm_P            , O(000F00,F5,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9092 , 146, 88 ), // #554
+  INST(Pmaxsb           , ExtRm              , O(660F38,3C,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9123 , 11 , 12 ), // #555
+  INST(Pmaxsd           , ExtRm              , O(660F38,3D,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9131 , 11 , 12 ), // #556
+  INST(Pmaxsw           , ExtRm_P            , O(000F00,EE,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9147 , 148, 94 ), // #557
+  INST(Pmaxub           , ExtRm_P            , O(000F00,DE,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9155 , 148, 94 ), // #558
+  INST(Pmaxud           , ExtRm              , O(660F38,3F,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9163 , 11 , 12 ), // #559
+  INST(Pmaxuw           , ExtRm              , O(660F38,3E,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9179 , 11 , 12 ), // #560
+  INST(Pminsb           , ExtRm              , O(660F38,38,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9187 , 11 , 12 ), // #561
+  INST(Pminsd           , ExtRm              , O(660F38,39,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9195 , 11 , 12 ), // #562
+  INST(Pminsw           , ExtRm_P            , O(000F00,EA,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9211 , 148, 94 ), // #563
+  INST(Pminub           , ExtRm_P            , O(000F00,DA,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9219 , 148, 94 ), // #564
+  INST(Pminud           , ExtRm              , O(660F38,3B,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9227 , 11 , 12 ), // #565
+  INST(Pminuw           , ExtRm              , O(660F38,3A,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9243 , 11 , 12 ), // #566
+  INST(Pmovmskb         , ExtRm_P            , O(000F00,D7,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9321 , 163, 94 ), // #567
+  INST(Pmovsxbd         , ExtRm              , O(660F38,21,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9418 , 7  , 12 ), // #568
+  INST(Pmovsxbq         , ExtRm              , O(660F38,22,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9428 , 164, 12 ), // #569
+  INST(Pmovsxbw         , ExtRm              , O(660F38,20,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9438 , 6  , 12 ), // #570
+  INST(Pmovsxdq         , ExtRm              , O(660F38,25,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9448 , 6  , 12 ), // #571
+  INST(Pmovsxwd         , ExtRm              , O(660F38,23,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9458 , 6  , 12 ), // #572
+  INST(Pmovsxwq         , ExtRm              , O(660F38,24,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9468 , 7  , 12 ), // #573
+  INST(Pmovzxbd         , ExtRm              , O(660F38,31,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9555 , 7  , 12 ), // #574
+  INST(Pmovzxbq         , ExtRm              , O(660F38,32,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9565 , 164, 12 ), // #575
+  INST(Pmovzxbw         , ExtRm              , O(660F38,30,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9575 , 6  , 12 ), // #576
+  INST(Pmovzxdq         , ExtRm              , O(660F38,35,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9585 , 6  , 12 ), // #577
+  INST(Pmovzxwd         , ExtRm              , O(660F38,33,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9595 , 6  , 12 ), // #578
+  INST(Pmovzxwq         , ExtRm              , O(660F38,34,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9605 , 7  , 12 ), // #579
+  INST(Pmuldq           , ExtRm              , O(660F38,28,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9615 , 5  , 12 ), // #580
+  INST(Pmulhrsw         , ExtRm_P            , O(000F38,0B,_,_,_,_,_,_  ), 0                         , 83 , 0  , 9623 , 146, 93 ), // #581
+  INST(Pmulhrw          , Ext3dNow           , O(000F0F,B7,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2265 , 150, 51 ), // #582
+  INST(Pmulhuw          , ExtRm_P            , O(000F00,E4,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9633 , 146, 94 ), // #583
+  INST(Pmulhw           , ExtRm_P            , O(000F00,E5,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9642 , 146, 88 ), // #584
+  INST(Pmulld           , ExtRm              , O(660F38,40,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9650 , 5  , 12 ), // #585
+  INST(Pmullw           , ExtRm_P            , O(000F00,D5,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9666 , 146, 88 ), // #586
+  INST(Pmuludq          , ExtRm_P            , O(000F00,F4,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9689 , 146, 4  ), // #587
+  INST(Pop              , X86Pop             , O(000000,8F,0,_,_,_,_,_  ), O(000000,58,_,_,_,_,_,_  ), 0  , 67 , 2273 , 165, 0  ), // #588
+  INST(Popa             , X86Op              , O(660000,61,_,_,_,_,_,_  ), 0                         , 19 , 0  , 2277 , 81 , 0  ), // #589
+  INST(Popad            , X86Op              , O(000000,61,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2282 , 81 , 0  ), // #590
+  INST(Popcnt           , X86Rm_Raw66H       , O(F30F00,B8,_,_,x,_,_,_  ), 0                         , 6  , 0  , 2288 , 22 , 101), // #591
+  INST(Popf             , X86Op              , O(660000,9D,_,_,_,_,_,_  ), 0                         , 19 , 0  , 2295 , 30 , 102), // #592
+  INST(Popfd            , X86Op              , O(000000,9D,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2300 , 81 , 102), // #593
+  INST(Popfq            , X86Op              , O(000000,9D,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2306 , 33 , 102), // #594
+  INST(Por              , ExtRm_P            , O(000F00,EB,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9734 , 148, 88 ), // #595
+  INST(Prefetch         , X86M_Only          , O(000F00,0D,0,_,_,_,_,_  ), 0                         , 4  , 0  , 2312 , 31 , 51 ), // #596
+  INST(Prefetchnta      , X86M_Only          , O(000F00,18,0,_,_,_,_,_  ), 0                         , 4  , 0  , 2321 , 31 , 80 ), // #597
+  INST(Prefetcht0       , X86M_Only          , O(000F00,18,1,_,_,_,_,_  ), 0                         , 29 , 0  , 2333 , 31 , 80 ), // #598
+  INST(Prefetcht1       , X86M_Only          , O(000F00,18,2,_,_,_,_,_  ), 0                         , 76 , 0  , 2344 , 31 , 80 ), // #599
+  INST(Prefetcht2       , X86M_Only          , O(000F00,18,3,_,_,_,_,_  ), 0                         , 78 , 0  , 2355 , 31 , 80 ), // #600
+  INST(Prefetchw        , X86M_Only          , O(000F00,0D,1,_,_,_,_,_  ), 0                         , 29 , 0  , 2366 , 31 , 103), // #601
+  INST(Prefetchwt1      , X86M_Only          , O(000F00,0D,2,_,_,_,_,_  ), 0                         , 76 , 0  , 2376 , 31 , 104), // #602
+  INST(Psadbw           , ExtRm_P            , O(000F00,F6,_,_,_,_,_,_  ), 0                         , 4  , 0  , 4644 , 146, 94 ), // #603
+  INST(Pshufb           , ExtRm_P            , O(000F38,00,_,_,_,_,_,_  ), 0                         , 83 , 0  , 10060, 146, 93 ), // #604
+  INST(Pshufd           , ExtRmi             , O(660F00,70,_,_,_,_,_,_  ), 0                         , 3  , 0  , 10081, 8  , 4  ), // #605
+  INST(Pshufhw          , ExtRmi             , O(F30F00,70,_,_,_,_,_,_  ), 0                         , 6  , 0  , 10089, 8  , 4  ), // #606
+  INST(Pshuflw          , ExtRmi             , O(F20F00,70,_,_,_,_,_,_  ), 0                         , 5  , 0  , 10098, 8  , 4  ), // #607
+  INST(Pshufw           , ExtRmi_P           , O(000F00,70,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2388 , 166, 80 ), // #608
+  INST(Psignb           , ExtRm_P            , O(000F38,08,_,_,_,_,_,_  ), 0                         , 83 , 0  , 10107, 146, 93 ), // #609
+  INST(Psignd           , ExtRm_P            , O(000F38,0A,_,_,_,_,_,_  ), 0                         , 83 , 0  , 10115, 146, 93 ), // #610
+  INST(Psignw           , ExtRm_P            , O(000F38,09,_,_,_,_,_,_  ), 0                         , 83 , 0  , 10123, 146, 93 ), // #611
+  INST(Pslld            , ExtRmRi_P          , O(000F00,F2,_,_,_,_,_,_  ), O(000F00,72,6,_,_,_,_,_  ), 4  , 68 , 10131, 167, 88 ), // #612
+  INST(Pslldq           , ExtRmRi            , 0                         , O(660F00,73,7,_,_,_,_,_  ), 0  , 69 , 10138, 168, 4  ), // #613
+  INST(Psllq            , ExtRmRi_P          , O(000F00,F3,_,_,_,_,_,_  ), O(000F00,73,6,_,_,_,_,_  ), 4  , 70 , 10146, 167, 88 ), // #614
+  INST(Psllw            , ExtRmRi_P          , O(000F00,F1,_,_,_,_,_,_  ), O(000F00,71,6,_,_,_,_,_  ), 4  , 71 , 10177, 167, 88 ), // #615
+  INST(Psmash           , X86Op              , O(F30F01,FF,_,_,_,_,_,_  ), 0                         , 25 , 0  , 2395 , 33 , 105), // #616
+  INST(Psrad            , ExtRmRi_P          , O(000F00,E2,_,_,_,_,_,_  ), O(000F00,72,4,_,_,_,_,_  ), 4  , 72 , 10184, 167, 88 ), // #617
+  INST(Psraw            , ExtRmRi_P          , O(000F00,E1,_,_,_,_,_,_  ), O(000F00,71,4,_,_,_,_,_  ), 4  , 73 , 10222, 167, 88 ), // #618
+  INST(Psrld            , ExtRmRi_P          , O(000F00,D2,_,_,_,_,_,_  ), O(000F00,72,2,_,_,_,_,_  ), 4  , 74 , 10229, 167, 88 ), // #619
+  INST(Psrldq           , ExtRmRi            , 0                         , O(660F00,73,3,_,_,_,_,_  ), 0  , 75 , 10236, 168, 4  ), // #620
+  INST(Psrlq            , ExtRmRi_P          , O(000F00,D3,_,_,_,_,_,_  ), O(000F00,73,2,_,_,_,_,_  ), 4  , 76 , 10244, 167, 88 ), // #621
+  INST(Psrlw            , ExtRmRi_P          , O(000F00,D1,_,_,_,_,_,_  ), O(000F00,71,2,_,_,_,_,_  ), 4  , 77 , 10275, 167, 88 ), // #622
+  INST(Psubb            , ExtRm_P            , O(000F00,F8,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10282, 149, 88 ), // #623
+  INST(Psubd            , ExtRm_P            , O(000F00,FA,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10289, 149, 88 ), // #624
+  INST(Psubq            , ExtRm_P            , O(000F00,FB,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10296, 149, 4  ), // #625
+  INST(Psubsb           , ExtRm_P            , O(000F00,E8,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10303, 149, 88 ), // #626
+  INST(Psubsw           , ExtRm_P            , O(000F00,E9,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10311, 149, 88 ), // #627
+  INST(Psubusb          , ExtRm_P            , O(000F00,D8,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10319, 149, 88 ), // #628
+  INST(Psubusw          , ExtRm_P            , O(000F00,D9,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10328, 149, 88 ), // #629
+  INST(Psubw            , ExtRm_P            , O(000F00,F9,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10337, 149, 88 ), // #630
+  INST(Pswapd           , Ext3dNow           , O(000F0F,BB,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2402 , 150, 99 ), // #631
+  INST(Ptest            , ExtRm              , O(660F38,17,_,_,_,_,_,_  ), 0                         , 2  , 0  , 10366, 5  , 106), // #632
+  INST(Ptwrite          , X86M               , O(F30F00,AE,4,_,_,_,_,_  ), 0                         , 91 , 0  , 2409 , 169, 107), // #633
+  INST(Punpckhbw        , ExtRm_P            , O(000F00,68,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10449, 146, 88 ), // #634
+  INST(Punpckhdq        , ExtRm_P            , O(000F00,6A,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10460, 146, 88 ), // #635
+  INST(Punpckhqdq       , ExtRm              , O(660F00,6D,_,_,_,_,_,_  ), 0                         , 3  , 0  , 10471, 5  , 4  ), // #636
+  INST(Punpckhwd        , ExtRm_P            , O(000F00,69,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10483, 146, 88 ), // #637
+  INST(Punpcklbw        , ExtRm_P            , O(000F00,60,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10494, 170, 88 ), // #638
+  INST(Punpckldq        , ExtRm_P            , O(000F00,62,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10505, 170, 88 ), // #639
+  INST(Punpcklqdq       , ExtRm              , O(660F00,6C,_,_,_,_,_,_  ), 0                         , 3  , 0  , 10516, 5  , 4  ), // #640
+  INST(Punpcklwd        , ExtRm_P            , O(000F00,61,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10528, 170, 88 ), // #641
+  INST(Push             , X86Push            , O(000000,FF,6,_,_,_,_,_  ), O(000000,50,_,_,_,_,_,_  ), 32 , 78 , 2417 , 171, 0  ), // #642
+  INST(Pusha            , X86Op              , O(660000,60,_,_,_,_,_,_  ), 0                         , 19 , 0  , 2422 , 81 , 0  ), // #643
+  INST(Pushad           , X86Op              , O(000000,60,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2428 , 81 , 0  ), // #644
+  INST(Pushf            , X86Op              , O(660000,9C,_,_,_,_,_,_  ), 0                         , 19 , 0  , 2435 , 30 , 108), // #645
+  INST(Pushfd           , X86Op              , O(000000,9C,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2441 , 81 , 108), // #646
+  INST(Pushfq           , X86Op              , O(000000,9C,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2448 , 33 , 108), // #647
+  INST(Pvalidate        , X86Op              , O(F20F01,FF,_,_,_,_,_,_  ), 0                         , 92 , 0  , 2455 , 30 , 109), // #648
+  INST(Pxor             , ExtRm_P            , O(000F00,EF,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10539, 149, 88 ), // #649
+  INST(Rcl              , X86Rot             , O(000000,D0,2,_,x,_,_,_  ), 0                         , 1  , 0  , 2465 , 172, 110), // #650
+  INST(Rcpps            , ExtRm              , O(000F00,53,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10674, 5  , 5  ), // #651
+  INST(Rcpss            , ExtRm              , O(F30F00,53,_,_,_,_,_,_  ), 0                         , 6  , 0  , 10688, 7  , 5  ), // #652
+  INST(Rcr              , X86Rot             , O(000000,D0,3,_,x,_,_,_  ), 0                         , 75 , 0  , 2469 , 172, 110), // #653
+  INST(Rdfsbase         , X86M               , O(F30F00,AE,0,_,x,_,_,_  ), 0                         , 6  , 0  , 2473 , 173, 111), // #654
+  INST(Rdgsbase         , X86M               , O(F30F00,AE,1,_,x,_,_,_  ), 0                         , 93 , 0  , 2482 , 173, 111), // #655
+  INST(Rdmsr            , X86Op              , O(000F00,32,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2491 , 174, 112), // #656
+  INST(Rdpid            , X86R_Native        , O(F30F00,C7,7,_,_,_,_,_  ), 0                         , 94 , 0  , 2497 , 175, 113), // #657
+  INST(Rdpkru           , X86Op              , O(000F01,EE,_,_,_,_,_,_  ), 0                         , 21 , 0  , 2503 , 174, 114), // #658
+  INST(Rdpmc            , X86Op              , O(000F00,33,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2510 , 174, 0  ), // #659
+  INST(Rdpru            , X86Op              , O(000F01,FD,_,_,_,_,_,_  ), 0                         , 21 , 0  , 2516 , 174, 115), // #660
+  INST(Rdrand           , X86M               , O(000F00,C7,6,_,x,_,_,_  ), 0                         , 80 , 0  , 2522 , 23 , 116), // #661
+  INST(Rdseed           , X86M               , O(000F00,C7,7,_,x,_,_,_  ), 0                         , 22 , 0  , 2529 , 23 , 117), // #662
+  INST(Rdsspd           , X86M               , O(F30F00,1E,1,_,_,_,_,_  ), 0                         , 93 , 0  , 2536 , 76 , 56 ), // #663
+  INST(Rdsspq           , X86M               , O(F30F00,1E,1,_,_,_,_,_  ), 0                         , 93 , 0  , 2543 , 77 , 56 ), // #664
+  INST(Rdtsc            , X86Op              , O(000F00,31,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2550 , 28 , 118), // #665
+  INST(Rdtscp           , X86Op              , O(000F01,F9,_,_,_,_,_,_  ), 0                         , 21 , 0  , 2556 , 174, 119), // #666
+  INST(Ret              , X86Ret             , O(000000,C2,_,_,_,_,_,_  ), 0                         , 0  , 0  , 3072 , 176, 0  ), // #667
+  INST(Retf             , X86Ret             , O(000000,CA,_,_,x,_,_,_  ), 0                         , 0  , 0  , 2563 , 177, 0  ), // #668
+  INST(Rmpadjust        , X86Op              , O(F30F01,FE,_,_,_,_,_,_  ), 0                         , 25 , 0  , 2568 , 33 , 105), // #669
+  INST(Rmpupdate        , X86Op              , O(F20F01,FE,_,_,_,_,_,_  ), 0                         , 92 , 0  , 2578 , 33 , 105), // #670
+  INST(Rol              , X86Rot             , O(000000,D0,0,_,x,_,_,_  ), 0                         , 0  , 0  , 2588 , 172, 120), // #671
+  INST(Ror              , X86Rot             , O(000000,D0,1,_,x,_,_,_  ), 0                         , 31 , 0  , 2592 , 172, 120), // #672
+  INST(Rorx             , VexRmi_Wx          , V(F20F3A,F0,_,0,x,_,_,_  ), 0                         , 95 , 0  , 2596 , 178, 92 ), // #673
+  INST(Roundpd          , ExtRmi             , O(660F3A,09,_,_,_,_,_,_  ), 0                         , 8  , 0  , 10827, 8  , 12 ), // #674
+  INST(Roundps          , ExtRmi             , O(660F3A,08,_,_,_,_,_,_  ), 0                         , 8  , 0  , 10836, 8  , 12 ), // #675
+  INST(Roundsd          , ExtRmi             , O(660F3A,0B,_,_,_,_,_,_  ), 0                         , 8  , 0  , 10845, 37 , 12 ), // #676
+  INST(Roundss          , ExtRmi             , O(660F3A,0A,_,_,_,_,_,_  ), 0                         , 8  , 0  , 10854, 38 , 12 ), // #677
+  INST(Rsm              , X86Op              , O(000F00,AA,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2601 , 81 , 1  ), // #678
+  INST(Rsqrtps          , ExtRm              , O(000F00,52,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10960, 5  , 5  ), // #679
+  INST(Rsqrtss          , ExtRm              , O(F30F00,52,_,_,_,_,_,_  ), 0                         , 6  , 0  , 10978, 7  , 5  ), // #680
+  INST(Rstorssp         , X86M_Only          , O(F30F00,01,5,_,_,_,_,_  ), 0                         , 63 , 0  , 2605 , 32 , 24 ), // #681
+  INST(Sahf             , X86Op              , O(000000,9E,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2614 , 97 , 121), // #682
+  INST(Sal              , X86Rot             , O(000000,D0,4,_,x,_,_,_  ), 0                         , 9  , 0  , 2619 , 172, 1  ), // #683
+  INST(Sar              , X86Rot             , O(000000,D0,7,_,x,_,_,_  ), 0                         , 27 , 0  , 2623 , 172, 1  ), // #684
+  INST(Sarx             , VexRmv_Wx          , V(F30F38,F7,_,0,x,_,_,_  ), 0                         , 88 , 0  , 2627 , 13 , 92 ), // #685
+  INST(Saveprevssp      , X86Op              , O(F30F01,EA,_,_,_,_,_,_  ), 0                         , 25 , 0  , 2632 , 30 , 24 ), // #686
+  INST(Sbb              , X86Arith           , O(000000,18,3,_,x,_,_,_  ), 0                         , 75 , 0  , 2644 , 179, 2  ), // #687
+  INST(Scas             , X86StrRm           , O(000000,AE,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2648 , 180, 37 ), // #688
+  INST(Senduipi         , X86M_NoSize        , O(F30F00,C7,6,_,_,_,_,_  ), 0                         , 24 , 0  , 2653 , 77 , 25 ), // #689
+  INST(Serialize        , X86Op              , O(000F01,E8,_,_,_,_,_,_  ), 0                         , 21 , 0  , 2662 , 30 , 122), // #690
+  INST(Seta             , X86Set             , O(000F00,97,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2672 , 181, 59 ), // #691
+  INST(Setae            , X86Set             , O(000F00,93,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2677 , 181, 60 ), // #692
+  INST(Setb             , X86Set             , O(000F00,92,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2683 , 181, 60 ), // #693
+  INST(Setbe            , X86Set             , O(000F00,96,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2688 , 181, 59 ), // #694
+  INST(Setc             , X86Set             , O(000F00,92,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2694 , 181, 60 ), // #695
+  INST(Sete             , X86Set             , O(000F00,94,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2699 , 181, 61 ), // #696
+  INST(Setg             , X86Set             , O(000F00,9F,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2704 , 181, 62 ), // #697
+  INST(Setge            , X86Set             , O(000F00,9D,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2709 , 181, 63 ), // #698
+  INST(Setl             , X86Set             , O(000F00,9C,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2715 , 181, 63 ), // #699
+  INST(Setle            , X86Set             , O(000F00,9E,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2720 , 181, 62 ), // #700
+  INST(Setna            , X86Set             , O(000F00,96,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2726 , 181, 59 ), // #701
+  INST(Setnae           , X86Set             , O(000F00,92,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2732 , 181, 60 ), // #702
+  INST(Setnb            , X86Set             , O(000F00,93,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2739 , 181, 60 ), // #703
+  INST(Setnbe           , X86Set             , O(000F00,97,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2745 , 181, 59 ), // #704
+  INST(Setnc            , X86Set             , O(000F00,93,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2752 , 181, 60 ), // #705
+  INST(Setne            , X86Set             , O(000F00,95,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2758 , 181, 61 ), // #706
+  INST(Setng            , X86Set             , O(000F00,9E,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2764 , 181, 62 ), // #707
+  INST(Setnge           , X86Set             , O(000F00,9C,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2770 , 181, 63 ), // #708
+  INST(Setnl            , X86Set             , O(000F00,9D,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2777 , 181, 63 ), // #709
+  INST(Setnle           , X86Set             , O(000F00,9F,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2783 , 181, 62 ), // #710
+  INST(Setno            , X86Set             , O(000F00,91,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2790 , 181, 57 ), // #711
+  INST(Setnp            , X86Set             , O(000F00,9B,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2796 , 181, 64 ), // #712
+  INST(Setns            , X86Set             , O(000F00,99,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2802 , 181, 65 ), // #713
+  INST(Setnz            , X86Set             , O(000F00,95,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2808 , 181, 61 ), // #714
+  INST(Seto             , X86Set             , O(000F00,90,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2814 , 181, 57 ), // #715
+  INST(Setp             , X86Set             , O(000F00,9A,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2819 , 181, 64 ), // #716
+  INST(Setpe            , X86Set             , O(000F00,9A,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2824 , 181, 64 ), // #717
+  INST(Setpo            , X86Set             , O(000F00,9B,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2830 , 181, 64 ), // #718
+  INST(Sets             , X86Set             , O(000F00,98,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2836 , 181, 65 ), // #719
+  INST(Setssbsy         , X86Op              , O(F30F01,E8,_,_,_,_,_,_  ), 0                         , 25 , 0  , 2841 , 30 , 56 ), // #720
+  INST(Setz             , X86Set             , O(000F00,94,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2850 , 181, 61 ), // #721
+  INST(Sfence           , X86Fence           , O(000F00,AE,7,_,_,_,_,_  ), 0                         , 22 , 0  , 2855 , 30 , 80 ), // #722
+  INST(Sgdt             , X86M_Only          , O(000F00,01,0,_,_,_,_,_  ), 0                         , 4  , 0  , 2862 , 69 , 0  ), // #723
+  INST(Sha1msg1         , ExtRm              , O(000F38,C9,_,_,_,_,_,_  ), 0                         , 83 , 0  , 2867 , 5  , 123), // #724
+  INST(Sha1msg2         , ExtRm              , O(000F38,CA,_,_,_,_,_,_  ), 0                         , 83 , 0  , 2876 , 5  , 123), // #725
+  INST(Sha1nexte        , ExtRm              , O(000F38,C8,_,_,_,_,_,_  ), 0                         , 83 , 0  , 2885 , 5  , 123), // #726
+  INST(Sha1rnds4        , ExtRmi             , O(000F3A,CC,_,_,_,_,_,_  ), 0                         , 85 , 0  , 2895 , 8  , 123), // #727
+  INST(Sha256msg1       , ExtRm              , O(000F38,CC,_,_,_,_,_,_  ), 0                         , 83 , 0  , 2905 , 5  , 123), // #728
+  INST(Sha256msg2       , ExtRm              , O(000F38,CD,_,_,_,_,_,_  ), 0                         , 83 , 0  , 2916 , 5  , 123), // #729
+  INST(Sha256rnds2      , ExtRm_XMM0         , O(000F38,CB,_,_,_,_,_,_  ), 0                         , 83 , 0  , 2927 , 15 , 123), // #730
+  INST(Shl              , X86Rot             , O(000000,D0,4,_,x,_,_,_  ), 0                         , 9  , 0  , 2939 , 172, 1  ), // #731
+  INST(Shld             , X86ShldShrd        , O(000F00,A4,_,_,x,_,_,_  ), 0                         , 4  , 0  , 9938 , 182, 1  ), // #732
+  INST(Shlx             , VexRmv_Wx          , V(660F38,F7,_,0,x,_,_,_  ), 0                         , 96 , 0  , 2943 , 13 , 92 ), // #733
+  INST(Shr              , X86Rot             , O(000000,D0,5,_,x,_,_,_  ), 0                         , 62 , 0  , 2948 , 172, 1  ), // #734
+  INST(Shrd             , X86ShldShrd        , O(000F00,AC,_,_,x,_,_,_  ), 0                         , 4  , 0  , 2952 , 182, 1  ), // #735
+  INST(Shrx             , VexRmv_Wx          , V(F20F38,F7,_,0,x,_,_,_  ), 0                         , 84 , 0  , 2957 , 13 , 92 ), // #736
+  INST(Shufpd           , ExtRmi             , O(660F00,C6,_,_,_,_,_,_  ), 0                         , 3  , 0  , 11259, 8  , 4  ), // #737
+  INST(Shufps           , ExtRmi             , O(000F00,C6,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11267, 8  , 5  ), // #738
+  INST(Sidt             , X86M_Only          , O(000F00,01,1,_,_,_,_,_  ), 0                         , 29 , 0  , 2962 , 69 , 0  ), // #739
+  INST(Skinit           , X86Op_xAX          , O(000F01,DE,_,_,_,_,_,_  ), 0                         , 21 , 0  , 2967 , 52 , 124), // #740
+  INST(Sldt             , X86M_NoMemSize     , O(000F00,00,0,_,_,_,_,_  ), 0                         , 4  , 0  , 2974 , 183, 0  ), // #741
+  INST(Slwpcb           , VexR_Wx            , V(XOP_M9,12,1,0,x,_,_,_  ), 0                         , 11 , 0  , 2979 , 108, 77 ), // #742
+  INST(Smsw             , X86M_NoMemSize     , O(000F00,01,4,_,_,_,_,_  ), 0                         , 97 , 0  , 2986 , 183, 0  ), // #743
+  INST(Sqrtpd           , ExtRm              , O(660F00,51,_,_,_,_,_,_  ), 0                         , 3  , 0  , 11275, 5  , 4  ), // #744
+  INST(Sqrtps           , ExtRm              , O(000F00,51,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10961, 5  , 5  ), // #745
+  INST(Sqrtsd           , ExtRm              , O(F20F00,51,_,_,_,_,_,_  ), 0                         , 5  , 0  , 11299, 6  , 4  ), // #746
+  INST(Sqrtss           , ExtRm              , O(F30F00,51,_,_,_,_,_,_  ), 0                         , 6  , 0  , 10979, 7  , 5  ), // #747
+  INST(Stac             , X86Op              , O(000F01,CB,_,_,_,_,_,_  ), 0                         , 21 , 0  , 2991 , 30 , 16 ), // #748
+  INST(Stc              , X86Op              , O(000000,F9,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2996 , 30 , 17 ), // #749
+  INST(Std              , X86Op              , O(000000,FD,_,_,_,_,_,_  ), 0                         , 0  , 0  , 7921 , 30 , 18 ), // #750
+  INST(Stgi             , X86Op              , O(000F01,DC,_,_,_,_,_,_  ), 0                         , 21 , 0  , 3000 , 30 , 124), // #751
+  INST(Sti              , X86Op              , O(000000,FB,_,_,_,_,_,_  ), 0                         , 0  , 0  , 3005 , 30 , 23 ), // #752
+  INST(Stmxcsr          , X86M_Only          , O(000F00,AE,3,_,_,_,_,_  ), 0                         , 78 , 0  , 11323, 101, 5  ), // #753
+  INST(Stos             , X86StrMr           , O(000000,AA,_,_,_,_,_,_  ), 0                         , 0  , 0  , 3009 , 184, 78 ), // #754
+  INST(Str              , X86M_NoMemSize     , O(000F00,00,1,_,_,_,_,_  ), 0                         , 29 , 0  , 3014 , 183, 0  ), // #755
+  INST(Sttilecfg        , AmxCfg             , V(660F38,49,_,0,0,_,_,_  ), 0                         , 96 , 0  , 3018 , 103, 76 ), // #756
+  INST(Stui             , X86Op              , O(F30F01,EF,_,_,_,_,_,_  ), 0                         , 25 , 0  , 3135 , 33 , 25 ), // #757
+  INST(Sub              , X86Arith           , O(000000,28,5,_,x,_,_,_  ), 0                         , 62 , 0  , 866  , 179, 1  ), // #758
+  INST(Subpd            , ExtRm              , O(660F00,5C,_,_,_,_,_,_  ), 0                         , 3  , 0  , 5413 , 5  , 4  ), // #759
+  INST(Subps            , ExtRm              , O(000F00,5C,_,_,_,_,_,_  ), 0                         , 4  , 0  , 5425 , 5  , 5  ), // #760
+  INST(Subsd            , ExtRm              , O(F20F00,5C,_,_,_,_,_,_  ), 0                         , 5  , 0  , 6392 , 6  , 4  ), // #761
+  INST(Subss            , ExtRm              , O(F30F00,5C,_,_,_,_,_,_  ), 0                         , 6  , 0  , 6402 , 7  , 5  ), // #762
+  INST(Swapgs           , X86Op              , O(000F01,F8,_,_,_,_,_,_  ), 0                         , 21 , 0  , 3028 , 33 , 0  ), // #763
+  INST(Syscall          , X86Op              , O(000F00,05,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3035 , 33 , 0  ), // #764
+  INST(Sysenter         , X86Op              , O(000F00,34,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3043 , 30 , 0  ), // #765
+  INST(Sysexit          , X86Op              , O(000F00,35,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3052 , 30 , 0  ), // #766
+  INST(Sysexitq         , X86Op              , O(000F00,35,_,_,1,_,_,_  ), 0                         , 60 , 0  , 3060 , 30 , 0  ), // #767
+  INST(Sysret           , X86Op              , O(000F00,07,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3069 , 33 , 0  ), // #768
+  INST(Sysretq          , X86Op              , O(000F00,07,_,_,1,_,_,_  ), 0                         , 60 , 0  , 3076 , 33 , 0  ), // #769
+  INST(T1mskc           , VexVm_Wx           , V(XOP_M9,01,7,0,x,_,_,_  ), 0                         , 98 , 0  , 3084 , 14 , 11 ), // #770
+  INST(Tdpbf16ps        , AmxRmv             , V(F30F38,5C,_,0,0,_,_,_  ), 0                         , 88 , 0  , 3091 , 185, 125), // #771
+  INST(Tdpbssd          , AmxRmv             , V(F20F38,5E,_,0,0,_,_,_  ), 0                         , 84 , 0  , 3101 , 185, 126), // #772
+  INST(Tdpbsud          , AmxRmv             , V(F30F38,5E,_,0,0,_,_,_  ), 0                         , 88 , 0  , 3109 , 185, 126), // #773
+  INST(Tdpbusd          , AmxRmv             , V(660F38,5E,_,0,0,_,_,_  ), 0                         , 96 , 0  , 3117 , 185, 126), // #774
+  INST(Tdpbuud          , AmxRmv             , V(000F38,5E,_,0,0,_,_,_  ), 0                         , 10 , 0  , 3125 , 185, 126), // #775
+  INST(Test             , X86Test            , O(000000,84,_,_,x,_,_,_  ), O(000000,F6,_,_,x,_,_,_  ), 0  , 79 , 10367, 186, 1  ), // #776
+  INST(Testui           , X86Op              , O(F30F01,ED,_,_,_,_,_,_  ), 0                         , 25 , 0  , 3133 , 33 , 127), // #777
+  INST(Tileloadd        , AmxRm              , V(F20F38,4B,_,0,0,_,_,_  ), 0                         , 84 , 0  , 3140 , 187, 76 ), // #778
+  INST(Tileloaddt1      , AmxRm              , V(660F38,4B,_,0,0,_,_,_  ), 0                         , 96 , 0  , 3150 , 187, 76 ), // #779
+  INST(Tilerelease      , VexOpMod           , V(000F38,49,0,0,0,_,_,_  ), 0                         , 10 , 0  , 3162 , 188, 76 ), // #780
+  INST(Tilestored       , AmxMr              , V(F30F38,4B,_,0,0,_,_,_  ), 0                         , 88 , 0  , 3174 , 189, 76 ), // #781
+  INST(Tilezero         , AmxR               , V(F20F38,49,_,0,0,_,_,_  ), 0                         , 84 , 0  , 3185 , 190, 76 ), // #782
+  INST(Tpause           , X86R32_EDX_EAX     , O(660F00,AE,6,_,_,_,_,_  ), 0                         , 26 , 0  , 3194 , 191, 128), // #783
+  INST(Tzcnt            , X86Rm_Raw66H       , O(F30F00,BC,_,_,x,_,_,_  ), 0                         , 6  , 0  , 3201 , 22 , 9  ), // #784
+  INST(Tzmsk            , VexVm_Wx           , V(XOP_M9,01,4,0,x,_,_,_  ), 0                         , 99 , 0  , 3207 , 14 , 11 ), // #785
+  INST(Ucomisd          , ExtRm              , O(660F00,2E,_,_,_,_,_,_  ), 0                         , 3  , 0  , 11390, 6  , 41 ), // #786
+  INST(Ucomiss          , ExtRm              , O(000F00,2E,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11408, 7  , 42 ), // #787
+  INST(Ud0              , X86Rm              , O(000F00,FF,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3213 , 192, 0  ), // #788
+  INST(Ud1              , X86Rm              , O(000F00,B9,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3217 , 192, 0  ), // #789
+  INST(Ud2              , X86Op              , O(000F00,0B,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3221 , 30 , 0  ), // #790
+  INST(Uiret            , X86Op              , O(F30F01,EC,_,_,_,_,_,_  ), 0                         , 25 , 0  , 3225 , 33 , 25 ), // #791
+  INST(Umonitor         , X86R_FromM         , O(F30F00,AE,6,_,_,_,_,_  ), 0                         , 24 , 0  , 3231 , 193, 129), // #792
+  INST(Umwait           , X86R32_EDX_EAX     , O(F20F00,AE,6,_,_,_,_,_  ), 0                         , 100, 0  , 3240 , 191, 128), // #793
+  INST(Unpckhpd         , ExtRm              , O(660F00,15,_,_,_,_,_,_  ), 0                         , 3  , 0  , 11417, 5  , 4  ), // #794
+  INST(Unpckhps         , ExtRm              , O(000F00,15,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11427, 5  , 5  ), // #795
+  INST(Unpcklpd         , ExtRm              , O(660F00,14,_,_,_,_,_,_  ), 0                         , 3  , 0  , 11437, 5  , 4  ), // #796
+  INST(Unpcklps         , ExtRm              , O(000F00,14,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11447, 5  , 5  ), // #797
+  INST(V4fmaddps        , VexRm_T1_4X        , E(F20F38,9A,_,2,_,0,4,T4X), 0                         , 101, 0  , 3247 , 194, 130), // #798
+  INST(V4fmaddss        , VexRm_T1_4X        , E(F20F38,9B,_,0,_,0,4,T4X), 0                         , 102, 0  , 3257 , 195, 130), // #799
+  INST(V4fnmaddps       , VexRm_T1_4X        , E(F20F38,AA,_,2,_,0,4,T4X), 0                         , 101, 0  , 3267 , 194, 130), // #800
+  INST(V4fnmaddss       , VexRm_T1_4X        , E(F20F38,AB,_,0,_,0,4,T4X), 0                         , 102, 0  , 3278 , 195, 130), // #801
+  INST(Vaddpd           , VexRvm_Lx          , V(660F00,58,_,x,I,1,4,FV ), 0                         , 103, 0  , 3289 , 196, 131), // #802
+  INST(Vaddph           , VexRvm_Lx          , E(00MAP5,58,_,_,_,0,4,FV ), 0                         , 104, 0  , 3296 , 197, 132), // #803
+  INST(Vaddps           , VexRvm_Lx          , V(000F00,58,_,x,I,0,4,FV ), 0                         , 105, 0  , 3303 , 198, 131), // #804
+  INST(Vaddsd           , VexRvm             , V(F20F00,58,_,I,I,1,3,T1S), 0                         , 106, 0  , 3310 , 199, 133), // #805
+  INST(Vaddsh           , VexRvm             , E(F3MAP5,58,_,_,_,0,1,T1S), 0                         , 107, 0  , 3317 , 200, 134), // #806
+  INST(Vaddss           , VexRvm             , V(F30F00,58,_,I,I,0,2,T1S), 0                         , 108, 0  , 3324 , 201, 133), // #807
+  INST(Vaddsubpd        , VexRvm_Lx          , V(660F00,D0,_,x,I,_,_,_  ), 0                         , 69 , 0  , 3331 , 202, 135), // #808
+  INST(Vaddsubps        , VexRvm_Lx          , V(F20F00,D0,_,x,I,_,_,_  ), 0                         , 109, 0  , 3341 , 202, 135), // #809
+  INST(Vaesdec          , VexRvm_Lx          , V(660F38,DE,_,x,I,_,4,FVM), 0                         , 110, 0  , 3351 , 203, 136), // #810
+  INST(Vaesdeclast      , VexRvm_Lx          , V(660F38,DF,_,x,I,_,4,FVM), 0                         , 110, 0  , 3359 , 203, 136), // #811
+  INST(Vaesenc          , VexRvm_Lx          , V(660F38,DC,_,x,I,_,4,FVM), 0                         , 110, 0  , 3371 , 203, 136), // #812
+  INST(Vaesenclast      , VexRvm_Lx          , V(660F38,DD,_,x,I,_,4,FVM), 0                         , 110, 0  , 3379 , 203, 136), // #813
+  INST(Vaesimc          , VexRm              , V(660F38,DB,_,0,I,_,_,_  ), 0                         , 96 , 0  , 3391 , 204, 137), // #814
+  INST(Vaeskeygenassist , VexRmi             , V(660F3A,DF,_,0,I,_,_,_  ), 0                         , 73 , 0  , 3399 , 205, 137), // #815
+  INST(Valignd          , VexRvmi_Lx         , E(660F3A,03,_,x,_,0,4,FV ), 0                         , 111, 0  , 3416 , 206, 138), // #816
+  INST(Valignq          , VexRvmi_Lx         , E(660F3A,03,_,x,_,1,4,FV ), 0                         , 112, 0  , 3424 , 207, 138), // #817
+  INST(Vandnpd          , VexRvm_Lx          , V(660F00,55,_,x,I,1,4,FV ), 0                         , 103, 0  , 3432 , 208, 139), // #818
+  INST(Vandnps          , VexRvm_Lx          , V(000F00,55,_,x,I,0,4,FV ), 0                         , 105, 0  , 3440 , 209, 139), // #819
+  INST(Vandpd           , VexRvm_Lx          , V(660F00,54,_,x,I,1,4,FV ), 0                         , 103, 0  , 3448 , 210, 139), // #820
+  INST(Vandps           , VexRvm_Lx          , V(000F00,54,_,x,I,0,4,FV ), 0                         , 105, 0  , 3455 , 211, 139), // #821
+  INST(Vblendmpd        , VexRvm_Lx          , E(660F38,65,_,x,_,1,4,FV ), 0                         , 113, 0  , 3462 , 212, 138), // #822
+  INST(Vblendmps        , VexRvm_Lx          , E(660F38,65,_,x,_,0,4,FV ), 0                         , 114, 0  , 3472 , 213, 138), // #823
+  INST(Vblendpd         , VexRvmi_Lx         , V(660F3A,0D,_,x,I,_,_,_  ), 0                         , 73 , 0  , 3482 , 214, 135), // #824
+  INST(Vblendps         , VexRvmi_Lx         , V(660F3A,0C,_,x,I,_,_,_  ), 0                         , 73 , 0  , 3491 , 214, 135), // #825
+  INST(Vblendvpd        , VexRvmr_Lx         , V(660F3A,4B,_,x,0,_,_,_  ), 0                         , 73 , 0  , 3500 , 215, 135), // #826
+  INST(Vblendvps        , VexRvmr_Lx         , V(660F3A,4A,_,x,0,_,_,_  ), 0                         , 73 , 0  , 3510 , 215, 135), // #827
+  INST(Vbroadcastf128   , VexRm              , V(660F38,1A,_,1,0,_,_,_  ), 0                         , 115, 0  , 3520 , 216, 135), // #828
+  INST(Vbroadcastf32x2  , VexRm_Lx           , E(660F38,19,_,x,_,0,3,T2 ), 0                         , 116, 0  , 3535 , 217, 140), // #829
+  INST(Vbroadcastf32x4  , VexRm_Lx           , E(660F38,1A,_,x,_,0,4,T4 ), 0                         , 117, 0  , 3551 , 218, 68 ), // #830
+  INST(Vbroadcastf32x8  , VexRm              , E(660F38,1B,_,2,_,0,5,T8 ), 0                         , 118, 0  , 3567 , 219, 66 ), // #831
+  INST(Vbroadcastf64x2  , VexRm_Lx           , E(660F38,1A,_,x,_,1,4,T2 ), 0                         , 119, 0  , 3583 , 218, 140), // #832
+  INST(Vbroadcastf64x4  , VexRm              , E(660F38,1B,_,2,_,1,5,T4 ), 0                         , 120, 0  , 3599 , 219, 68 ), // #833
+  INST(Vbroadcasti128   , VexRm              , V(660F38,5A,_,1,0,_,_,_  ), 0                         , 115, 0  , 3615 , 216, 141), // #834
+  INST(Vbroadcasti32x2  , VexRm_Lx           , E(660F38,59,_,x,_,0,3,T2 ), 0                         , 116, 0  , 3630 , 220, 140), // #835
+  INST(Vbroadcasti32x4  , VexRm_Lx           , E(660F38,5A,_,x,_,0,4,T4 ), 0                         , 117, 0  , 3646 , 218, 138), // #836
+  INST(Vbroadcasti32x8  , VexRm              , E(660F38,5B,_,2,_,0,5,T8 ), 0                         , 118, 0  , 3662 , 219, 66 ), // #837
+  INST(Vbroadcasti64x2  , VexRm_Lx           , E(660F38,5A,_,x,_,1,4,T2 ), 0                         , 119, 0  , 3678 , 218, 140), // #838
+  INST(Vbroadcasti64x4  , VexRm              , E(660F38,5B,_,2,_,1,5,T4 ), 0                         , 120, 0  , 3694 , 219, 68 ), // #839
+  INST(Vbroadcastsd     , VexRm_Lx           , V(660F38,19,_,x,0,1,3,T1S), 0                         , 121, 0  , 3710 , 221, 142), // #840
+  INST(Vbroadcastss     , VexRm_Lx           , V(660F38,18,_,x,0,0,2,T1S), 0                         , 122, 0  , 3723 , 222, 142), // #841
+  INST(Vcmppd           , VexRvmi_Lx_KEvex   , V(660F00,C2,_,x,I,1,4,FV ), 0                         , 103, 0  , 3736 , 223, 131), // #842
+  INST(Vcmpph           , VexRvmi_Lx_KEvex   , E(000F3A,C2,_,_,_,0,4,FV ), 0                         , 123, 0  , 3743 , 224, 132), // #843
+  INST(Vcmpps           , VexRvmi_Lx_KEvex   , V(000F00,C2,_,x,I,0,4,FV ), 0                         , 105, 0  , 3750 , 225, 131), // #844
+  INST(Vcmpsd           , VexRvmi_KEvex      , V(F20F00,C2,_,I,I,1,3,T1S), 0                         , 106, 0  , 3757 , 226, 133), // #845
+  INST(Vcmpsh           , VexRvmi_KEvex      , E(F30F3A,C2,_,_,_,0,1,T1S), 0                         , 124, 0  , 3764 , 227, 134), // #846
+  INST(Vcmpss           , VexRvmi_KEvex      , V(F30F00,C2,_,I,I,0,2,T1S), 0                         , 108, 0  , 3771 , 228, 133), // #847
+  INST(Vcomisd          , VexRm              , V(660F00,2F,_,I,I,1,3,T1S), 0                         , 125, 0  , 3778 , 229, 143), // #848
+  INST(Vcomish          , VexRm              , E(00MAP5,2F,_,_,_,0,1,T1S), 0                         , 126, 0  , 3786 , 230, 134), // #849
+  INST(Vcomiss          , VexRm              , V(000F00,2F,_,I,I,0,2,T1S), 0                         , 127, 0  , 3794 , 231, 143), // #850
+  INST(Vcompresspd      , VexMr_Lx           , E(660F38,8A,_,x,_,1,3,T1S), 0                         , 128, 0  , 3802 , 232, 138), // #851
+  INST(Vcompressps      , VexMr_Lx           , E(660F38,8A,_,x,_,0,2,T1S), 0                         , 129, 0  , 3814 , 232, 138), // #852
+  INST(Vcvtdq2pd        , VexRm_Lx           , V(F30F00,E6,_,x,I,0,3,HV ), 0                         , 130, 0  , 3826 , 233, 131), // #853
+  INST(Vcvtdq2ph        , VexRm_Lx           , E(00MAP5,5B,_,_,_,0,4,FV ), 0                         , 104, 0  , 3836 , 234, 132), // #854
+  INST(Vcvtdq2ps        , VexRm_Lx           , V(000F00,5B,_,x,I,0,4,FV ), 0                         , 105, 0  , 3846 , 235, 131), // #855
+  INST(Vcvtne2ps2bf16   , VexRvm_Lx          , E(F20F38,72,_,_,_,0,4,FV ), 0                         , 131, 0  , 3856 , 213, 144), // #856
+  INST(Vcvtneps2bf16    , VexRm_Lx_Narrow    , E(F30F38,72,_,_,_,0,4,FV ), 0                         , 132, 0  , 3871 , 236, 144), // #857
+  INST(Vcvtpd2dq        , VexRm_Lx_Narrow    , V(F20F00,E6,_,x,I,1,4,FV ), 0                         , 133, 0  , 3885 , 237, 131), // #858
+  INST(Vcvtpd2ph        , VexRm_Lx           , E(66MAP5,5A,_,_,_,1,4,FV ), 0                         , 134, 0  , 3895 , 238, 132), // #859
+  INST(Vcvtpd2ps        , VexRm_Lx_Narrow    , V(660F00,5A,_,x,I,1,4,FV ), 0                         , 103, 0  , 3905 , 237, 131), // #860
+  INST(Vcvtpd2qq        , VexRm_Lx           , E(660F00,7B,_,x,_,1,4,FV ), 0                         , 135, 0  , 3915 , 239, 140), // #861
+  INST(Vcvtpd2udq       , VexRm_Lx_Narrow    , E(000F00,79,_,x,_,1,4,FV ), 0                         , 136, 0  , 3925 , 240, 138), // #862
+  INST(Vcvtpd2uqq       , VexRm_Lx           , E(660F00,79,_,x,_,1,4,FV ), 0                         , 135, 0  , 3936 , 239, 140), // #863
+  INST(Vcvtph2dq        , VexRm_Lx           , E(66MAP5,5B,_,_,_,0,3,HV ), 0                         , 137, 0  , 3947 , 241, 132), // #864
+  INST(Vcvtph2pd        , VexRm_Lx           , E(00MAP5,5A,_,_,_,0,2,QV ), 0                         , 138, 0  , 3957 , 242, 132), // #865
+  INST(Vcvtph2ps        , VexRm_Lx           , V(660F38,13,_,x,0,0,3,HVM), 0                         , 139, 0  , 3967 , 243, 145), // #866
+  INST(Vcvtph2psx       , VexRm_Lx           , E(66MAP6,13,_,_,_,0,3,HV ), 0                         , 140, 0  , 3977 , 244, 132), // #867
+  INST(Vcvtph2qq        , VexRm_Lx           , E(66MAP5,7B,_,_,_,0,2,QV ), 0                         , 141, 0  , 3988 , 245, 132), // #868
+  INST(Vcvtph2udq       , VexRm_Lx           , E(00MAP5,79,_,_,_,0,3,HV ), 0                         , 142, 0  , 3998 , 241, 132), // #869
+  INST(Vcvtph2uqq       , VexRm_Lx           , E(66MAP5,79,_,_,_,0,2,QV ), 0                         , 141, 0  , 4009 , 245, 132), // #870
+  INST(Vcvtph2uw        , VexRm_Lx           , E(00MAP5,7D,_,_,_,0,4,FV ), 0                         , 104, 0  , 4020 , 246, 132), // #871
+  INST(Vcvtph2w         , VexRm_Lx           , E(66MAP5,7D,_,_,_,0,4,FV ), 0                         , 143, 0  , 4030 , 246, 132), // #872
+  INST(Vcvtps2dq        , VexRm_Lx           , V(660F00,5B,_,x,I,0,4,FV ), 0                         , 144, 0  , 4039 , 235, 131), // #873
+  INST(Vcvtps2pd        , VexRm_Lx           , V(000F00,5A,_,x,I,0,3,HV ), 0                         , 145, 0  , 4049 , 247, 131), // #874
+  INST(Vcvtps2ph        , VexMri_Lx          , V(660F3A,1D,_,x,0,0,3,HVM), 0                         , 146, 0  , 4059 , 248, 145), // #875
+  INST(Vcvtps2phx       , VexRm_Lx           , E(66MAP5,1D,_,_,_,0,4,FV ), 0                         , 143, 0  , 4069 , 234, 132), // #876
+  INST(Vcvtps2qq        , VexRm_Lx           , E(660F00,7B,_,x,_,0,3,HV ), 0                         , 147, 0  , 4080 , 249, 140), // #877
+  INST(Vcvtps2udq       , VexRm_Lx           , E(000F00,79,_,x,_,0,4,FV ), 0                         , 148, 0  , 4090 , 250, 138), // #878
+  INST(Vcvtps2uqq       , VexRm_Lx           , E(660F00,79,_,x,_,0,3,HV ), 0                         , 147, 0  , 4101 , 249, 140), // #879
+  INST(Vcvtqq2pd        , VexRm_Lx           , E(F30F00,E6,_,x,_,1,4,FV ), 0                         , 149, 0  , 4112 , 239, 140), // #880
+  INST(Vcvtqq2ph        , VexRm_Lx           , E(00MAP5,5B,_,_,_,1,4,FV ), 0                         , 150, 0  , 4122 , 238, 132), // #881
+  INST(Vcvtqq2ps        , VexRm_Lx_Narrow    , E(000F00,5B,_,x,_,1,4,FV ), 0                         , 136, 0  , 4132 , 240, 140), // #882
+  INST(Vcvtsd2sh        , VexRvm             , E(F2MAP5,5A,_,_,_,1,3,T1S), 0                         , 151, 0  , 4142 , 251, 134), // #883
+  INST(Vcvtsd2si        , VexRm_Wx           , V(F20F00,2D,_,I,x,x,3,T1F), 0                         , 152, 0  , 4152 , 252, 133), // #884
+  INST(Vcvtsd2ss        , VexRvm             , V(F20F00,5A,_,I,I,1,3,T1S), 0                         , 106, 0  , 4162 , 199, 133), // #885
+  INST(Vcvtsd2usi       , VexRm_Wx           , E(F20F00,79,_,I,_,x,3,T1F), 0                         , 153, 0  , 4172 , 253, 68 ), // #886
+  INST(Vcvtsh2sd        , VexRvm             , E(F3MAP5,5A,_,_,_,0,1,T1S), 0                         , 107, 0  , 4183 , 254, 134), // #887
+  INST(Vcvtsh2si        , VexRm_Wx           , E(F3MAP5,2D,_,_,_,x,1,T1S), 0                         , 107, 0  , 4193 , 255, 134), // #888
+  INST(Vcvtsh2ss        , VexRvm             , E(00MAP6,13,_,_,_,0,1,T1S), 0                         , 154, 0  , 4203 , 254, 134), // #889
+  INST(Vcvtsh2usi       , VexRm_Wx           , E(F3MAP5,79,_,_,_,x,1,T1S), 0                         , 107, 0  , 4213 , 255, 134), // #890
+  INST(Vcvtsi2sd        , VexRvm_Wx          , V(F20F00,2A,_,I,x,x,2,T1W), 0                         , 155, 0  , 4224 , 256, 133), // #891
+  INST(Vcvtsi2sh        , VexRvm_Wx          , E(F3MAP5,2A,_,_,_,x,2,T1W), 0                         , 156, 0  , 4234 , 257, 134), // #892
+  INST(Vcvtsi2ss        , VexRvm_Wx          , V(F30F00,2A,_,I,x,x,2,T1W), 0                         , 157, 0  , 4244 , 256, 133), // #893
+  INST(Vcvtss2sd        , VexRvm             , V(F30F00,5A,_,I,I,0,2,T1S), 0                         , 108, 0  , 4254 , 258, 133), // #894
+  INST(Vcvtss2sh        , VexRvm             , E(00MAP5,1D,_,_,_,0,2,T1S), 0                         , 158, 0  , 4264 , 259, 134), // #895
+  INST(Vcvtss2si        , VexRm_Wx           , V(F30F00,2D,_,I,x,x,2,T1F), 0                         , 108, 0  , 4274 , 260, 133), // #896
+  INST(Vcvtss2usi       , VexRm_Wx           , E(F30F00,79,_,I,_,x,2,T1F), 0                         , 159, 0  , 4284 , 261, 68 ), // #897
+  INST(Vcvttpd2dq       , VexRm_Lx_Narrow    , V(660F00,E6,_,x,I,1,4,FV ), 0                         , 103, 0  , 4295 , 262, 131), // #898
+  INST(Vcvttpd2qq       , VexRm_Lx           , E(660F00,7A,_,x,_,1,4,FV ), 0                         , 135, 0  , 4306 , 263, 138), // #899
+  INST(Vcvttpd2udq      , VexRm_Lx_Narrow    , E(000F00,78,_,x,_,1,4,FV ), 0                         , 136, 0  , 4317 , 264, 138), // #900
+  INST(Vcvttpd2uqq      , VexRm_Lx           , E(660F00,78,_,x,_,1,4,FV ), 0                         , 135, 0  , 4329 , 263, 140), // #901
+  INST(Vcvttph2dq       , VexRm_Lx           , E(F3MAP5,5B,_,_,_,0,3,HV ), 0                         , 160, 0  , 4341 , 244, 132), // #902
+  INST(Vcvttph2qq       , VexRm_Lx           , E(66MAP5,7A,_,_,_,0,2,QV ), 0                         , 141, 0  , 4352 , 242, 132), // #903
+  INST(Vcvttph2udq      , VexRm_Lx           , E(00MAP5,78,_,_,_,0,3,HV ), 0                         , 142, 0  , 4363 , 244, 132), // #904
+  INST(Vcvttph2uqq      , VexRm_Lx           , E(66MAP5,78,_,_,_,0,2,QV ), 0                         , 141, 0  , 4375 , 242, 132), // #905
+  INST(Vcvttph2uw       , VexRm_Lx           , E(00MAP5,7C,_,_,_,0,4,FV ), 0                         , 104, 0  , 4387 , 265, 132), // #906
+  INST(Vcvttph2w        , VexRm_Lx           , E(66MAP5,7C,_,_,_,0,4,FV ), 0                         , 143, 0  , 4398 , 265, 132), // #907
+  INST(Vcvttps2dq       , VexRm_Lx           , V(F30F00,5B,_,x,I,0,4,FV ), 0                         , 161, 0  , 4408 , 266, 131), // #908
+  INST(Vcvttps2qq       , VexRm_Lx           , E(660F00,7A,_,x,_,0,3,HV ), 0                         , 147, 0  , 4419 , 267, 140), // #909
+  INST(Vcvttps2udq      , VexRm_Lx           , E(000F00,78,_,x,_,0,4,FV ), 0                         , 148, 0  , 4430 , 268, 138), // #910
+  INST(Vcvttps2uqq      , VexRm_Lx           , E(660F00,78,_,x,_,0,3,HV ), 0                         , 147, 0  , 4442 , 267, 140), // #911
+  INST(Vcvttsd2si       , VexRm_Wx           , V(F20F00,2C,_,I,x,x,3,T1F), 0                         , 152, 0  , 4454 , 269, 133), // #912
+  INST(Vcvttsd2usi      , VexRm_Wx           , E(F20F00,78,_,I,_,x,3,T1F), 0                         , 153, 0  , 4465 , 270, 68 ), // #913
+  INST(Vcvttsh2si       , VexRm_Wx           , E(F3MAP5,2C,_,_,_,x,1,T1S), 0                         , 107, 0  , 4477 , 271, 134), // #914
+  INST(Vcvttsh2usi      , VexRm_Wx           , E(F3MAP5,78,_,_,_,x,1,T1S), 0                         , 107, 0  , 4488 , 271, 134), // #915
+  INST(Vcvttss2si       , VexRm_Wx           , V(F30F00,2C,_,I,x,x,2,T1F), 0                         , 108, 0  , 4500 , 272, 133), // #916
+  INST(Vcvttss2usi      , VexRm_Wx           , E(F30F00,78,_,I,_,x,2,T1F), 0                         , 159, 0  , 4511 , 273, 68 ), // #917
+  INST(Vcvtudq2pd       , VexRm_Lx           , E(F30F00,7A,_,x,_,0,3,HV ), 0                         , 162, 0  , 4523 , 274, 138), // #918
+  INST(Vcvtudq2ph       , VexRm_Lx           , E(F2MAP5,7A,_,_,_,0,4,FV ), 0                         , 163, 0  , 4534 , 234, 132), // #919
+  INST(Vcvtudq2ps       , VexRm_Lx           , E(F20F00,7A,_,x,_,0,4,FV ), 0                         , 164, 0  , 4545 , 250, 138), // #920
+  INST(Vcvtuqq2pd       , VexRm_Lx           , E(F30F00,7A,_,x,_,1,4,FV ), 0                         , 149, 0  , 4556 , 239, 140), // #921
+  INST(Vcvtuqq2ph       , VexRm_Lx           , E(F2MAP5,7A,_,_,_,1,4,FV ), 0                         , 165, 0  , 4567 , 238, 132), // #922
+  INST(Vcvtuqq2ps       , VexRm_Lx_Narrow    , E(F20F00,7A,_,x,_,1,4,FV ), 0                         , 166, 0  , 4578 , 240, 140), // #923
+  INST(Vcvtusi2sd       , VexRvm_Wx          , E(F20F00,7B,_,I,_,x,2,T1W), 0                         , 167, 0  , 4589 , 257, 68 ), // #924
+  INST(Vcvtusi2sh       , VexRvm_Wx          , E(F3MAP5,7B,_,_,_,x,2,T1W), 0                         , 156, 0  , 4600 , 257, 134), // #925
+  INST(Vcvtusi2ss       , VexRvm_Wx          , E(F30F00,7B,_,I,_,x,2,T1W), 0                         , 168, 0  , 4611 , 257, 68 ), // #926
+  INST(Vcvtuw2ph        , VexRm_Lx           , E(F2MAP5,7D,_,_,_,0,4,FV ), 0                         , 163, 0  , 4622 , 246, 132), // #927
+  INST(Vcvtw2ph         , VexRm_Lx           , E(F3MAP5,7D,_,_,_,0,4,FV ), 0                         , 169, 0  , 4632 , 246, 132), // #928
+  INST(Vdbpsadbw        , VexRvmi_Lx         , E(660F3A,42,_,x,_,0,4,FVM), 0                         , 111, 0  , 4641 , 275, 146), // #929
+  INST(Vdivpd           , VexRvm_Lx          , V(660F00,5E,_,x,I,1,4,FV ), 0                         , 103, 0  , 4651 , 196, 131), // #930
+  INST(Vdivph           , VexRvm_Lx          , E(00MAP5,5E,_,_,_,0,4,FV ), 0                         , 104, 0  , 4658 , 197, 132), // #931
+  INST(Vdivps           , VexRvm_Lx          , V(000F00,5E,_,x,I,0,4,FV ), 0                         , 105, 0  , 4665 , 198, 131), // #932
+  INST(Vdivsd           , VexRvm             , V(F20F00,5E,_,I,I,1,3,T1S), 0                         , 106, 0  , 4672 , 199, 133), // #933
+  INST(Vdivsh           , VexRvm             , E(F3MAP5,5E,_,_,_,0,1,T1S), 0                         , 107, 0  , 4679 , 200, 134), // #934
+  INST(Vdivss           , VexRvm             , V(F30F00,5E,_,I,I,0,2,T1S), 0                         , 108, 0  , 4686 , 201, 133), // #935
+  INST(Vdpbf16ps        , VexRvm_Lx          , E(F30F38,52,_,_,_,0,4,FV ), 0                         , 132, 0  , 4693 , 213, 144), // #936
+  INST(Vdppd            , VexRvmi_Lx         , V(660F3A,41,_,x,I,_,_,_  ), 0                         , 73 , 0  , 4703 , 276, 135), // #937
+  INST(Vdpps            , VexRvmi_Lx         , V(660F3A,40,_,x,I,_,_,_  ), 0                         , 73 , 0  , 4709 , 214, 135), // #938
+  INST(Verr             , X86M_NoSize        , O(000F00,00,4,_,_,_,_,_  ), 0                         , 97 , 0  , 4715 , 107, 10 ), // #939
+  INST(Verw             , X86M_NoSize        , O(000F00,00,5,_,_,_,_,_  ), 0                         , 77 , 0  , 4720 , 107, 10 ), // #940
+  INST(Vexp2pd          , VexRm              , E(660F38,C8,_,2,_,1,4,FV ), 0                         , 170, 0  , 4725 , 277, 147), // #941
+  INST(Vexp2ps          , VexRm              , E(660F38,C8,_,2,_,0,4,FV ), 0                         , 171, 0  , 4733 , 278, 147), // #942
+  INST(Vexpandpd        , VexRm_Lx           , E(660F38,88,_,x,_,1,3,T1S), 0                         , 128, 0  , 4741 , 279, 138), // #943
+  INST(Vexpandps        , VexRm_Lx           , E(660F38,88,_,x,_,0,2,T1S), 0                         , 129, 0  , 4751 , 279, 138), // #944
+  INST(Vextractf128     , VexMri             , V(660F3A,19,_,1,0,_,_,_  ), 0                         , 172, 0  , 4761 , 280, 135), // #945
+  INST(Vextractf32x4    , VexMri_Lx          , E(660F3A,19,_,x,_,0,4,T4 ), 0                         , 173, 0  , 4774 , 281, 138), // #946
+  INST(Vextractf32x8    , VexMri             , E(660F3A,1B,_,2,_,0,5,T8 ), 0                         , 174, 0  , 4788 , 282, 66 ), // #947
+  INST(Vextractf64x2    , VexMri_Lx          , E(660F3A,19,_,x,_,1,4,T2 ), 0                         , 175, 0  , 4802 , 281, 140), // #948
+  INST(Vextractf64x4    , VexMri             , E(660F3A,1B,_,2,_,1,5,T4 ), 0                         , 176, 0  , 4816 , 282, 68 ), // #949
+  INST(Vextracti128     , VexMri             , V(660F3A,39,_,1,0,_,_,_  ), 0                         , 172, 0  , 4830 , 280, 141), // #950
+  INST(Vextracti32x4    , VexMri_Lx          , E(660F3A,39,_,x,_,0,4,T4 ), 0                         , 173, 0  , 4843 , 281, 138), // #951
+  INST(Vextracti32x8    , VexMri             , E(660F3A,3B,_,2,_,0,5,T8 ), 0                         , 174, 0  , 4857 , 282, 66 ), // #952
+  INST(Vextracti64x2    , VexMri_Lx          , E(660F3A,39,_,x,_,1,4,T2 ), 0                         , 175, 0  , 4871 , 281, 140), // #953
+  INST(Vextracti64x4    , VexMri             , E(660F3A,3B,_,2,_,1,5,T4 ), 0                         , 176, 0  , 4885 , 282, 68 ), // #954
+  INST(Vextractps       , VexMri             , V(660F3A,17,_,0,I,I,2,T1S), 0                         , 177, 0  , 4899 , 283, 133), // #955
+  INST(Vfcmaddcph       , VexRvm_Lx          , E(F2MAP6,56,_,_,_,0,4,FV ), 0                         , 178, 0  , 4910 , 284, 132), // #956
+  INST(Vfcmaddcsh       , VexRvm             , E(F2MAP6,57,_,_,_,0,2,T1S), 0                         , 179, 0  , 4921 , 259, 132), // #957
+  INST(Vfcmulcph        , VexRvm_Lx          , E(F2MAP6,D6,_,_,_,0,4,FV ), 0                         , 178, 0  , 4932 , 284, 132), // #958
+  INST(Vfcmulcsh        , VexRvm             , E(F2MAP6,D7,_,_,_,0,2,T1S), 0                         , 179, 0  , 4942 , 259, 132), // #959
+  INST(Vfixupimmpd      , VexRvmi_Lx         , E(660F3A,54,_,x,_,1,4,FV ), 0                         , 112, 0  , 4952 , 285, 138), // #960
+  INST(Vfixupimmps      , VexRvmi_Lx         , E(660F3A,54,_,x,_,0,4,FV ), 0                         , 111, 0  , 4964 , 286, 138), // #961
+  INST(Vfixupimmsd      , VexRvmi            , E(660F3A,55,_,I,_,1,3,T1S), 0                         , 180, 0  , 4976 , 287, 68 ), // #962
+  INST(Vfixupimmss      , VexRvmi            , E(660F3A,55,_,I,_,0,2,T1S), 0                         , 181, 0  , 4988 , 288, 68 ), // #963
+  INST(Vfmadd132pd      , VexRvm_Lx          , V(660F38,98,_,x,1,1,4,FV ), 0                         , 182, 0  , 5000 , 196, 148), // #964
+  INST(Vfmadd132ph      , VexRvm_Lx          , E(66MAP6,98,_,_,_,0,4,FV ), 0                         , 183, 0  , 5012 , 197, 132), // #965
+  INST(Vfmadd132ps      , VexRvm_Lx          , V(660F38,98,_,x,0,0,4,FV ), 0                         , 110, 0  , 5024 , 198, 148), // #966
+  INST(Vfmadd132sd      , VexRvm             , V(660F38,99,_,I,1,1,3,T1S), 0                         , 184, 0  , 5036 , 199, 149), // #967
+  INST(Vfmadd132sh      , VexRvm             , E(66MAP6,99,_,_,_,0,1,T1S), 0                         , 185, 0  , 5048 , 200, 134), // #968
+  INST(Vfmadd132ss      , VexRvm             , V(660F38,99,_,I,0,0,2,T1S), 0                         , 122, 0  , 5060 , 201, 149), // #969
+  INST(Vfmadd213pd      , VexRvm_Lx          , V(660F38,A8,_,x,1,1,4,FV ), 0                         , 182, 0  , 5072 , 196, 148), // #970
+  INST(Vfmadd213ph      , VexRvm_Lx          , E(66MAP6,A8,_,_,_,0,4,FV ), 0                         , 183, 0  , 5084 , 197, 132), // #971
+  INST(Vfmadd213ps      , VexRvm_Lx          , V(660F38,A8,_,x,0,0,4,FV ), 0                         , 110, 0  , 5096 , 198, 148), // #972
+  INST(Vfmadd213sd      , VexRvm             , V(660F38,A9,_,I,1,1,3,T1S), 0                         , 184, 0  , 5108 , 199, 149), // #973
+  INST(Vfmadd213sh      , VexRvm             , E(66MAP6,A9,_,_,_,0,1,T1S), 0                         , 185, 0  , 5120 , 200, 134), // #974
+  INST(Vfmadd213ss      , VexRvm             , V(660F38,A9,_,I,0,0,2,T1S), 0                         , 122, 0  , 5132 , 201, 149), // #975
+  INST(Vfmadd231pd      , VexRvm_Lx          , V(660F38,B8,_,x,1,1,4,FV ), 0                         , 182, 0  , 5144 , 196, 148), // #976
+  INST(Vfmadd231ph      , VexRvm_Lx          , E(66MAP6,B8,_,_,_,0,4,FV ), 0                         , 183, 0  , 5156 , 197, 132), // #977
+  INST(Vfmadd231ps      , VexRvm_Lx          , V(660F38,B8,_,x,0,0,4,FV ), 0                         , 110, 0  , 5168 , 198, 148), // #978
+  INST(Vfmadd231sd      , VexRvm             , V(660F38,B9,_,I,1,1,3,T1S), 0                         , 184, 0  , 5180 , 199, 149), // #979
+  INST(Vfmadd231sh      , VexRvm             , E(66MAP6,B9,_,_,_,0,1,T1S), 0                         , 185, 0  , 5192 , 200, 134), // #980
+  INST(Vfmadd231ss      , VexRvm             , V(660F38,B9,_,I,0,0,2,T1S), 0                         , 122, 0  , 5204 , 201, 149), // #981
+  INST(Vfmaddcph        , VexRvm_Lx          , E(F3MAP6,56,_,_,_,0,4,FV ), 0                         , 186, 0  , 5216 , 284, 132), // #982
+  INST(Vfmaddcsh        , VexRvm             , E(F3MAP6,57,_,_,_,0,2,T1S), 0                         , 187, 0  , 5226 , 259, 132), // #983
+  INST(Vfmaddpd         , Fma4_Lx            , V(660F3A,69,_,x,x,_,_,_  ), 0                         , 73 , 0  , 5236 , 289, 150), // #984
+  INST(Vfmaddps         , Fma4_Lx            , V(660F3A,68,_,x,x,_,_,_  ), 0                         , 73 , 0  , 5245 , 289, 150), // #985
+  INST(Vfmaddsd         , Fma4               , V(660F3A,6B,_,0,x,_,_,_  ), 0                         , 73 , 0  , 5254 , 290, 150), // #986
+  INST(Vfmaddss         , Fma4               , V(660F3A,6A,_,0,x,_,_,_  ), 0                         , 73 , 0  , 5263 , 291, 150), // #987
+  INST(Vfmaddsub132pd   , VexRvm_Lx          , V(660F38,96,_,x,1,1,4,FV ), 0                         , 182, 0  , 5272 , 196, 148), // #988
+  INST(Vfmaddsub132ph   , VexRvm_Lx          , E(66MAP6,96,_,_,_,0,4,FV ), 0                         , 183, 0  , 5287 , 197, 132), // #989
+  INST(Vfmaddsub132ps   , VexRvm_Lx          , V(660F38,96,_,x,0,0,4,FV ), 0                         , 110, 0  , 5302 , 198, 148), // #990
+  INST(Vfmaddsub213pd   , VexRvm_Lx          , V(660F38,A6,_,x,1,1,4,FV ), 0                         , 182, 0  , 5317 , 196, 148), // #991
+  INST(Vfmaddsub213ph   , VexRvm_Lx          , E(66MAP6,A6,_,_,_,0,4,FV ), 0                         , 183, 0  , 5332 , 197, 132), // #992
+  INST(Vfmaddsub213ps   , VexRvm_Lx          , V(660F38,A6,_,x,0,0,4,FV ), 0                         , 110, 0  , 5347 , 198, 148), // #993
+  INST(Vfmaddsub231pd   , VexRvm_Lx          , V(660F38,B6,_,x,1,1,4,FV ), 0                         , 182, 0  , 5362 , 196, 148), // #994
+  INST(Vfmaddsub231ph   , VexRvm_Lx          , E(66MAP6,B6,_,_,_,0,4,FV ), 0                         , 183, 0  , 5377 , 197, 132), // #995
+  INST(Vfmaddsub231ps   , VexRvm_Lx          , V(660F38,B6,_,x,0,0,4,FV ), 0                         , 110, 0  , 5392 , 198, 148), // #996
+  INST(Vfmaddsubpd      , Fma4_Lx            , V(660F3A,5D,_,x,x,_,_,_  ), 0                         , 73 , 0  , 5407 , 289, 150), // #997
+  INST(Vfmaddsubps      , Fma4_Lx            , V(660F3A,5C,_,x,x,_,_,_  ), 0                         , 73 , 0  , 5419 , 289, 150), // #998
+  INST(Vfmsub132pd      , VexRvm_Lx          , V(660F38,9A,_,x,1,1,4,FV ), 0                         , 182, 0  , 5431 , 196, 148), // #999
+  INST(Vfmsub132ph      , VexRvm_Lx          , E(66MAP6,9A,_,_,_,0,4,FV ), 0                         , 183, 0  , 5443 , 197, 132), // #1000
+  INST(Vfmsub132ps      , VexRvm_Lx          , V(660F38,9A,_,x,0,0,4,FV ), 0                         , 110, 0  , 5455 , 198, 148), // #1001
+  INST(Vfmsub132sd      , VexRvm             , V(660F38,9B,_,I,1,1,3,T1S), 0                         , 184, 0  , 5467 , 199, 149), // #1002
+  INST(Vfmsub132sh      , VexRvm             , E(66MAP6,9B,_,_,_,0,1,T1S), 0                         , 185, 0  , 5479 , 200, 134), // #1003
+  INST(Vfmsub132ss      , VexRvm             , V(660F38,9B,_,I,0,0,2,T1S), 0                         , 122, 0  , 5491 , 201, 149), // #1004
+  INST(Vfmsub213pd      , VexRvm_Lx          , V(660F38,AA,_,x,1,1,4,FV ), 0                         , 182, 0  , 5503 , 196, 148), // #1005
+  INST(Vfmsub213ph      , VexRvm_Lx          , E(66MAP6,AA,_,_,_,0,4,FV ), 0                         , 183, 0  , 5515 , 197, 132), // #1006
+  INST(Vfmsub213ps      , VexRvm_Lx          , V(660F38,AA,_,x,0,0,4,FV ), 0                         , 110, 0  , 5527 , 198, 148), // #1007
+  INST(Vfmsub213sd      , VexRvm             , V(660F38,AB,_,I,1,1,3,T1S), 0                         , 184, 0  , 5539 , 199, 149), // #1008
+  INST(Vfmsub213sh      , VexRvm             , E(66MAP6,AB,_,_,_,0,1,T1S), 0                         , 185, 0  , 5551 , 200, 134), // #1009
+  INST(Vfmsub213ss      , VexRvm             , V(660F38,AB,_,I,0,0,2,T1S), 0                         , 122, 0  , 5563 , 201, 149), // #1010
+  INST(Vfmsub231pd      , VexRvm_Lx          , V(660F38,BA,_,x,1,1,4,FV ), 0                         , 182, 0  , 5575 , 196, 148), // #1011
+  INST(Vfmsub231ph      , VexRvm_Lx          , E(66MAP6,BA,_,_,_,0,4,FV ), 0                         , 183, 0  , 5587 , 197, 132), // #1012
+  INST(Vfmsub231ps      , VexRvm_Lx          , V(660F38,BA,_,x,0,0,4,FV ), 0                         , 110, 0  , 5599 , 198, 148), // #1013
+  INST(Vfmsub231sd      , VexRvm             , V(660F38,BB,_,I,1,1,3,T1S), 0                         , 184, 0  , 5611 , 199, 149), // #1014
+  INST(Vfmsub231sh      , VexRvm             , E(66MAP6,BB,_,_,_,0,1,T1S), 0                         , 185, 0  , 5623 , 200, 134), // #1015
+  INST(Vfmsub231ss      , VexRvm             , V(660F38,BB,_,I,0,0,2,T1S), 0                         , 122, 0  , 5635 , 201, 149), // #1016
+  INST(Vfmsubadd132pd   , VexRvm_Lx          , V(660F38,97,_,x,1,1,4,FV ), 0                         , 182, 0  , 5647 , 196, 148), // #1017
+  INST(Vfmsubadd132ph   , VexRvm_Lx          , E(66MAP6,97,_,_,_,0,4,FV ), 0                         , 183, 0  , 5662 , 197, 132), // #1018
+  INST(Vfmsubadd132ps   , VexRvm_Lx          , V(660F38,97,_,x,0,0,4,FV ), 0                         , 110, 0  , 5677 , 198, 148), // #1019
+  INST(Vfmsubadd213pd   , VexRvm_Lx          , V(660F38,A7,_,x,1,1,4,FV ), 0                         , 182, 0  , 5692 , 196, 148), // #1020
+  INST(Vfmsubadd213ph   , VexRvm_Lx          , E(66MAP6,A7,_,_,_,0,4,FV ), 0                         , 183, 0  , 5707 , 197, 132), // #1021
+  INST(Vfmsubadd213ps   , VexRvm_Lx          , V(660F38,A7,_,x,0,0,4,FV ), 0                         , 110, 0  , 5722 , 198, 148), // #1022
+  INST(Vfmsubadd231pd   , VexRvm_Lx          , V(660F38,B7,_,x,1,1,4,FV ), 0                         , 182, 0  , 5737 , 196, 148), // #1023
+  INST(Vfmsubadd231ph   , VexRvm_Lx          , E(66MAP6,B7,_,_,_,0,4,FV ), 0                         , 183, 0  , 5752 , 197, 132), // #1024
+  INST(Vfmsubadd231ps   , VexRvm_Lx          , V(660F38,B7,_,x,0,0,4,FV ), 0                         , 110, 0  , 5767 , 198, 148), // #1025
+  INST(Vfmsubaddpd      , Fma4_Lx            , V(660F3A,5F,_,x,x,_,_,_  ), 0                         , 73 , 0  , 5782 , 289, 150), // #1026
+  INST(Vfmsubaddps      , Fma4_Lx            , V(660F3A,5E,_,x,x,_,_,_  ), 0                         , 73 , 0  , 5794 , 289, 150), // #1027
+  INST(Vfmsubpd         , Fma4_Lx            , V(660F3A,6D,_,x,x,_,_,_  ), 0                         , 73 , 0  , 5806 , 289, 150), // #1028
+  INST(Vfmsubps         , Fma4_Lx            , V(660F3A,6C,_,x,x,_,_,_  ), 0                         , 73 , 0  , 5815 , 289, 150), // #1029
+  INST(Vfmsubsd         , Fma4               , V(660F3A,6F,_,0,x,_,_,_  ), 0                         , 73 , 0  , 5824 , 290, 150), // #1030
+  INST(Vfmsubss         , Fma4               , V(660F3A,6E,_,0,x,_,_,_  ), 0                         , 73 , 0  , 5833 , 291, 150), // #1031
+  INST(Vfmulcph         , VexRvm_Lx          , E(F3MAP6,D6,_,_,_,0,4,FV ), 0                         , 186, 0  , 5842 , 284, 132), // #1032
+  INST(Vfmulcsh         , VexRvm             , E(F3MAP6,D7,_,_,_,0,2,T1S), 0                         , 187, 0  , 5851 , 259, 132), // #1033
+  INST(Vfnmadd132pd     , VexRvm_Lx          , V(660F38,9C,_,x,1,1,4,FV ), 0                         , 182, 0  , 5860 , 196, 148), // #1034
+  INST(Vfnmadd132ph     , VexRvm_Lx          , E(66MAP6,9C,_,_,_,0,4,FV ), 0                         , 183, 0  , 5873 , 197, 132), // #1035
+  INST(Vfnmadd132ps     , VexRvm_Lx          , V(660F38,9C,_,x,0,0,4,FV ), 0                         , 110, 0  , 5886 , 198, 148), // #1036
+  INST(Vfnmadd132sd     , VexRvm             , V(660F38,9D,_,I,1,1,3,T1S), 0                         , 184, 0  , 5899 , 199, 149), // #1037
+  INST(Vfnmadd132sh     , VexRvm             , E(66MAP6,9D,_,_,_,0,1,T1S), 0                         , 185, 0  , 5912 , 200, 134), // #1038
+  INST(Vfnmadd132ss     , VexRvm             , V(660F38,9D,_,I,0,0,2,T1S), 0                         , 122, 0  , 5925 , 201, 149), // #1039
+  INST(Vfnmadd213pd     , VexRvm_Lx          , V(660F38,AC,_,x,1,1,4,FV ), 0                         , 182, 0  , 5938 , 196, 148), // #1040
+  INST(Vfnmadd213ph     , VexRvm_Lx          , E(66MAP6,AC,_,_,_,0,4,FV ), 0                         , 183, 0  , 5951 , 197, 132), // #1041
+  INST(Vfnmadd213ps     , VexRvm_Lx          , V(660F38,AC,_,x,0,0,4,FV ), 0                         , 110, 0  , 5964 , 198, 148), // #1042
+  INST(Vfnmadd213sd     , VexRvm             , V(660F38,AD,_,I,1,1,3,T1S), 0                         , 184, 0  , 5977 , 199, 149), // #1043
+  INST(Vfnmadd213sh     , VexRvm             , E(66MAP6,AD,_,_,_,0,1,T1S), 0                         , 185, 0  , 5990 , 200, 134), // #1044
+  INST(Vfnmadd213ss     , VexRvm             , V(660F38,AD,_,I,0,0,2,T1S), 0                         , 122, 0  , 6003 , 201, 149), // #1045
+  INST(Vfnmadd231pd     , VexRvm_Lx          , V(660F38,BC,_,x,1,1,4,FV ), 0                         , 182, 0  , 6016 , 196, 148), // #1046
+  INST(Vfnmadd231ph     , VexRvm_Lx          , E(66MAP6,BC,_,_,_,0,4,FV ), 0                         , 183, 0  , 6029 , 197, 132), // #1047
+  INST(Vfnmadd231ps     , VexRvm_Lx          , V(660F38,BC,_,x,0,0,4,FV ), 0                         , 110, 0  , 6042 , 198, 148), // #1048
+  INST(Vfnmadd231sd     , VexRvm             , V(660F38,BD,_,I,1,1,3,T1S), 0                         , 184, 0  , 6055 , 199, 149), // #1049
+  INST(Vfnmadd231sh     , VexRvm             , E(66MAP6,BD,_,_,_,0,1,T1S), 0                         , 185, 0  , 6068 , 200, 134), // #1050
+  INST(Vfnmadd231ss     , VexRvm             , V(660F38,BD,_,I,0,0,2,T1S), 0                         , 122, 0  , 6081 , 201, 149), // #1051
+  INST(Vfnmaddpd        , Fma4_Lx            , V(660F3A,79,_,x,x,_,_,_  ), 0                         , 73 , 0  , 6094 , 289, 150), // #1052
+  INST(Vfnmaddps        , Fma4_Lx            , V(660F3A,78,_,x,x,_,_,_  ), 0                         , 73 , 0  , 6104 , 289, 150), // #1053
+  INST(Vfnmaddsd        , Fma4               , V(660F3A,7B,_,0,x,_,_,_  ), 0                         , 73 , 0  , 6114 , 290, 150), // #1054
+  INST(Vfnmaddss        , Fma4               , V(660F3A,7A,_,0,x,_,_,_  ), 0                         , 73 , 0  , 6124 , 291, 150), // #1055
+  INST(Vfnmsub132pd     , VexRvm_Lx          , V(660F38,9E,_,x,1,1,4,FV ), 0                         , 182, 0  , 6134 , 196, 148), // #1056
+  INST(Vfnmsub132ph     , VexRvm_Lx          , E(66MAP6,9E,_,_,_,0,4,FV ), 0                         , 183, 0  , 6147 , 197, 132), // #1057
+  INST(Vfnmsub132ps     , VexRvm_Lx          , V(660F38,9E,_,x,0,0,4,FV ), 0                         , 110, 0  , 6160 , 198, 148), // #1058
+  INST(Vfnmsub132sd     , VexRvm             , V(660F38,9F,_,I,1,1,3,T1S), 0                         , 184, 0  , 6173 , 199, 149), // #1059
+  INST(Vfnmsub132sh     , VexRvm             , E(66MAP6,9F,_,_,_,0,1,T1S), 0                         , 185, 0  , 6186 , 200, 134), // #1060
+  INST(Vfnmsub132ss     , VexRvm             , V(660F38,9F,_,I,0,0,2,T1S), 0                         , 122, 0  , 6199 , 201, 149), // #1061
+  INST(Vfnmsub213pd     , VexRvm_Lx          , V(660F38,AE,_,x,1,1,4,FV ), 0                         , 182, 0  , 6212 , 196, 148), // #1062
+  INST(Vfnmsub213ph     , VexRvm_Lx          , E(66MAP6,AE,_,_,_,0,4,FV ), 0                         , 183, 0  , 6225 , 197, 132), // #1063
+  INST(Vfnmsub213ps     , VexRvm_Lx          , V(660F38,AE,_,x,0,0,4,FV ), 0                         , 110, 0  , 6238 , 198, 148), // #1064
+  INST(Vfnmsub213sd     , VexRvm             , V(660F38,AF,_,I,1,1,3,T1S), 0                         , 184, 0  , 6251 , 199, 149), // #1065
+  INST(Vfnmsub213sh     , VexRvm             , E(66MAP6,AF,_,_,_,0,1,T1S), 0                         , 185, 0  , 6264 , 200, 134), // #1066
+  INST(Vfnmsub213ss     , VexRvm             , V(660F38,AF,_,I,0,0,2,T1S), 0                         , 122, 0  , 6277 , 201, 149), // #1067
+  INST(Vfnmsub231pd     , VexRvm_Lx          , V(660F38,BE,_,x,1,1,4,FV ), 0                         , 182, 0  , 6290 , 196, 148), // #1068
+  INST(Vfnmsub231ph     , VexRvm_Lx          , E(66MAP6,BE,_,_,_,0,4,FV ), 0                         , 183, 0  , 6303 , 197, 132), // #1069
+  INST(Vfnmsub231ps     , VexRvm_Lx          , V(660F38,BE,_,x,0,0,4,FV ), 0                         , 110, 0  , 6316 , 198, 148), // #1070
+  INST(Vfnmsub231sd     , VexRvm             , V(660F38,BF,_,I,1,1,3,T1S), 0                         , 184, 0  , 6329 , 199, 149), // #1071
+  INST(Vfnmsub231sh     , VexRvm             , E(66MAP6,BF,_,_,_,0,1,T1S), 0                         , 185, 0  , 6342 , 200, 134), // #1072
+  INST(Vfnmsub231ss     , VexRvm             , V(660F38,BF,_,I,0,0,2,T1S), 0                         , 122, 0  , 6355 , 201, 149), // #1073
+  INST(Vfnmsubpd        , Fma4_Lx            , V(660F3A,7D,_,x,x,_,_,_  ), 0                         , 73 , 0  , 6368 , 289, 150), // #1074
+  INST(Vfnmsubps        , Fma4_Lx            , V(660F3A,7C,_,x,x,_,_,_  ), 0                         , 73 , 0  , 6378 , 289, 150), // #1075
+  INST(Vfnmsubsd        , Fma4               , V(660F3A,7F,_,0,x,_,_,_  ), 0                         , 73 , 0  , 6388 , 290, 150), // #1076
+  INST(Vfnmsubss        , Fma4               , V(660F3A,7E,_,0,x,_,_,_  ), 0                         , 73 , 0  , 6398 , 291, 150), // #1077
+  INST(Vfpclasspd       , VexRmi_Lx          , E(660F3A,66,_,x,_,1,4,FV ), 0                         , 112, 0  , 6408 , 292, 140), // #1078
+  INST(Vfpclassph       , VexRmi_Lx          , E(000F3A,66,_,_,_,0,4,FV ), 0                         , 123, 0  , 6419 , 293, 132), // #1079
+  INST(Vfpclassps       , VexRmi_Lx          , E(660F3A,66,_,x,_,0,4,FV ), 0                         , 111, 0  , 6430 , 294, 140), // #1080
+  INST(Vfpclasssd       , VexRmi             , E(660F3A,67,_,I,_,1,3,T1S), 0                         , 180, 0  , 6441 , 295, 66 ), // #1081
+  INST(Vfpclasssh       , VexRmi             , E(000F3A,67,_,_,_,0,1,T1S), 0                         , 188, 0  , 6452 , 296, 134), // #1082
+  INST(Vfpclassss       , VexRmi             , E(660F3A,67,_,I,_,0,2,T1S), 0                         , 181, 0  , 6463 , 297, 66 ), // #1083
+  INST(Vfrczpd          , VexRm_Lx           , V(XOP_M9,81,_,x,0,_,_,_  ), 0                         , 79 , 0  , 6474 , 298, 151), // #1084
+  INST(Vfrczps          , VexRm_Lx           , V(XOP_M9,80,_,x,0,_,_,_  ), 0                         , 79 , 0  , 6482 , 298, 151), // #1085
+  INST(Vfrczsd          , VexRm              , V(XOP_M9,83,_,0,0,_,_,_  ), 0                         , 79 , 0  , 6490 , 299, 151), // #1086
+  INST(Vfrczss          , VexRm              , V(XOP_M9,82,_,0,0,_,_,_  ), 0                         , 79 , 0  , 6498 , 300, 151), // #1087
+  INST(Vgatherdpd       , VexRmvRm_VM        , V(660F38,92,_,x,1,_,_,_  ), E(660F38,92,_,x,_,1,3,T1S), 189, 80 , 6506 , 301, 152), // #1088
+  INST(Vgatherdps       , VexRmvRm_VM        , V(660F38,92,_,x,0,_,_,_  ), E(660F38,92,_,x,_,0,2,T1S), 96 , 81 , 6517 , 302, 152), // #1089
+  INST(Vgatherpf0dpd    , VexM_VM            , E(660F38,C6,1,2,_,1,3,T1S), 0                         , 190, 0  , 6528 , 303, 153), // #1090
+  INST(Vgatherpf0dps    , VexM_VM            , E(660F38,C6,1,2,_,0,2,T1S), 0                         , 191, 0  , 6542 , 304, 153), // #1091
+  INST(Vgatherpf0qpd    , VexM_VM            , E(660F38,C7,1,2,_,1,3,T1S), 0                         , 190, 0  , 6556 , 305, 153), // #1092
+  INST(Vgatherpf0qps    , VexM_VM            , E(660F38,C7,1,2,_,0,2,T1S), 0                         , 191, 0  , 6570 , 305, 153), // #1093
+  INST(Vgatherpf1dpd    , VexM_VM            , E(660F38,C6,2,2,_,1,3,T1S), 0                         , 192, 0  , 6584 , 303, 153), // #1094
+  INST(Vgatherpf1dps    , VexM_VM            , E(660F38,C6,2,2,_,0,2,T1S), 0                         , 193, 0  , 6598 , 304, 153), // #1095
+  INST(Vgatherpf1qpd    , VexM_VM            , E(660F38,C7,2,2,_,1,3,T1S), 0                         , 192, 0  , 6612 , 305, 153), // #1096
+  INST(Vgatherpf1qps    , VexM_VM            , E(660F38,C7,2,2,_,0,2,T1S), 0                         , 193, 0  , 6626 , 305, 153), // #1097
+  INST(Vgatherqpd       , VexRmvRm_VM        , V(660F38,93,_,x,1,_,_,_  ), E(660F38,93,_,x,_,1,3,T1S), 189, 82 , 6640 , 306, 152), // #1098
+  INST(Vgatherqps       , VexRmvRm_VM        , V(660F38,93,_,x,0,_,_,_  ), E(660F38,93,_,x,_,0,2,T1S), 96 , 83 , 6651 , 307, 152), // #1099
+  INST(Vgetexppd        , VexRm_Lx           , E(660F38,42,_,x,_,1,4,FV ), 0                         , 113, 0  , 6662 , 263, 138), // #1100
+  INST(Vgetexpph        , VexRm_Lx           , E(66MAP6,42,_,_,_,0,4,FV ), 0                         , 183, 0  , 6672 , 265, 132), // #1101
+  INST(Vgetexpps        , VexRm_Lx           , E(660F38,42,_,x,_,0,4,FV ), 0                         , 114, 0  , 6682 , 268, 138), // #1102
+  INST(Vgetexpsd        , VexRvm             , E(660F38,43,_,I,_,1,3,T1S), 0                         , 128, 0  , 6692 , 308, 68 ), // #1103
+  INST(Vgetexpsh        , VexRvm             , E(66MAP6,43,_,_,_,0,1,T1S), 0                         , 185, 0  , 6702 , 254, 134), // #1104
+  INST(Vgetexpss        , VexRvm             , E(660F38,43,_,I,_,0,2,T1S), 0                         , 129, 0  , 6712 , 309, 68 ), // #1105
+  INST(Vgetmantpd       , VexRmi_Lx          , E(660F3A,26,_,x,_,1,4,FV ), 0                         , 112, 0  , 6722 , 310, 138), // #1106
+  INST(Vgetmantph       , VexRmi_Lx          , E(000F3A,26,_,_,_,0,4,FV ), 0                         , 123, 0  , 6733 , 311, 132), // #1107
+  INST(Vgetmantps       , VexRmi_Lx          , E(660F3A,26,_,x,_,0,4,FV ), 0                         , 111, 0  , 6744 , 312, 138), // #1108
+  INST(Vgetmantsd       , VexRvmi            , E(660F3A,27,_,I,_,1,3,T1S), 0                         , 180, 0  , 6755 , 287, 68 ), // #1109
+  INST(Vgetmantsh       , VexRvmi            , E(000F3A,27,_,_,_,0,1,T1S), 0                         , 188, 0  , 6766 , 313, 134), // #1110
+  INST(Vgetmantss       , VexRvmi            , E(660F3A,27,_,I,_,0,2,T1S), 0                         , 181, 0  , 6777 , 288, 68 ), // #1111
+  INST(Vgf2p8affineinvqb, VexRvmi_Lx         , V(660F3A,CF,_,x,1,1,4,FV ), 0                         , 194, 0  , 6788 , 314, 154), // #1112
+  INST(Vgf2p8affineqb   , VexRvmi_Lx         , V(660F3A,CE,_,x,1,1,4,FV ), 0                         , 194, 0  , 6806 , 314, 154), // #1113
+  INST(Vgf2p8mulb       , VexRvm_Lx          , V(660F38,CF,_,x,0,0,4,FV ), 0                         , 110, 0  , 6821 , 315, 154), // #1114
+  INST(Vhaddpd          , VexRvm_Lx          , V(660F00,7C,_,x,I,_,_,_  ), 0                         , 69 , 0  , 6832 , 202, 135), // #1115
+  INST(Vhaddps          , VexRvm_Lx          , V(F20F00,7C,_,x,I,_,_,_  ), 0                         , 109, 0  , 6840 , 202, 135), // #1116
+  INST(Vhsubpd          , VexRvm_Lx          , V(660F00,7D,_,x,I,_,_,_  ), 0                         , 69 , 0  , 6848 , 202, 135), // #1117
+  INST(Vhsubps          , VexRvm_Lx          , V(F20F00,7D,_,x,I,_,_,_  ), 0                         , 109, 0  , 6856 , 202, 135), // #1118
+  INST(Vinsertf128      , VexRvmi            , V(660F3A,18,_,1,0,_,_,_  ), 0                         , 172, 0  , 6864 , 316, 135), // #1119
+  INST(Vinsertf32x4     , VexRvmi_Lx         , E(660F3A,18,_,x,_,0,4,T4 ), 0                         , 173, 0  , 6876 , 317, 138), // #1120
+  INST(Vinsertf32x8     , VexRvmi            , E(660F3A,1A,_,2,_,0,5,T8 ), 0                         , 174, 0  , 6889 , 318, 66 ), // #1121
+  INST(Vinsertf64x2     , VexRvmi_Lx         , E(660F3A,18,_,x,_,1,4,T2 ), 0                         , 175, 0  , 6902 , 317, 140), // #1122
+  INST(Vinsertf64x4     , VexRvmi            , E(660F3A,1A,_,2,_,1,5,T4 ), 0                         , 176, 0  , 6915 , 318, 68 ), // #1123
+  INST(Vinserti128      , VexRvmi            , V(660F3A,38,_,1,0,_,_,_  ), 0                         , 172, 0  , 6928 , 316, 141), // #1124
+  INST(Vinserti32x4     , VexRvmi_Lx         , E(660F3A,38,_,x,_,0,4,T4 ), 0                         , 173, 0  , 6940 , 317, 138), // #1125
+  INST(Vinserti32x8     , VexRvmi            , E(660F3A,3A,_,2,_,0,5,T8 ), 0                         , 174, 0  , 6953 , 318, 66 ), // #1126
+  INST(Vinserti64x2     , VexRvmi_Lx         , E(660F3A,38,_,x,_,1,4,T2 ), 0                         , 175, 0  , 6966 , 317, 140), // #1127
+  INST(Vinserti64x4     , VexRvmi            , E(660F3A,3A,_,2,_,1,5,T4 ), 0                         , 176, 0  , 6979 , 318, 68 ), // #1128
+  INST(Vinsertps        , VexRvmi            , V(660F3A,21,_,0,I,0,2,T1S), 0                         , 177, 0  , 6992 , 319, 133), // #1129
+  INST(Vlddqu           , VexRm_Lx           , V(F20F00,F0,_,x,I,_,_,_  ), 0                         , 109, 0  , 7002 , 320, 135), // #1130
+  INST(Vldmxcsr         , VexM               , V(000F00,AE,2,0,I,_,_,_  ), 0                         , 195, 0  , 7009 , 321, 135), // #1131
+  INST(Vmaskmovdqu      , VexRm_ZDI          , V(660F00,F7,_,0,I,_,_,_  ), 0                         , 69 , 0  , 7018 , 322, 135), // #1132
+  INST(Vmaskmovpd       , VexRvmMvr_Lx       , V(660F38,2D,_,x,0,_,_,_  ), V(660F38,2F,_,x,0,_,_,_  ), 96 , 84 , 7030 , 323, 135), // #1133
+  INST(Vmaskmovps       , VexRvmMvr_Lx       , V(660F38,2C,_,x,0,_,_,_  ), V(660F38,2E,_,x,0,_,_,_  ), 96 , 85 , 7041 , 323, 135), // #1134
+  INST(Vmaxpd           , VexRvm_Lx          , V(660F00,5F,_,x,I,1,4,FV ), 0                         , 103, 0  , 7052 , 324, 131), // #1135
+  INST(Vmaxph           , VexRvm_Lx          , E(00MAP5,5F,_,_,_,0,4,FV ), 0                         , 104, 0  , 7059 , 325, 132), // #1136
+  INST(Vmaxps           , VexRvm_Lx          , V(000F00,5F,_,x,I,0,4,FV ), 0                         , 105, 0  , 7066 , 326, 131), // #1137
+  INST(Vmaxsd           , VexRvm             , V(F20F00,5F,_,I,I,1,3,T1S), 0                         , 106, 0  , 7073 , 327, 131), // #1138
+  INST(Vmaxsh           , VexRvm             , E(F3MAP5,5F,_,_,_,0,1,T1S), 0                         , 107, 0  , 7080 , 254, 134), // #1139
+  INST(Vmaxss           , VexRvm             , V(F30F00,5F,_,I,I,0,2,T1S), 0                         , 108, 0  , 7087 , 258, 131), // #1140
+  INST(Vmcall           , X86Op              , O(000F01,C1,_,_,_,_,_,_  ), 0                         , 21 , 0  , 7094 , 30 , 58 ), // #1141
+  INST(Vmclear          , X86M_Only          , O(660F00,C7,6,_,_,_,_,_  ), 0                         , 26 , 0  , 7101 , 32 , 58 ), // #1142
+  INST(Vmfunc           , X86Op              , O(000F01,D4,_,_,_,_,_,_  ), 0                         , 21 , 0  , 7109 , 30 , 58 ), // #1143
+  INST(Vminpd           , VexRvm_Lx          , V(660F00,5D,_,x,I,1,4,FV ), 0                         , 103, 0  , 7116 , 324, 131), // #1144
+  INST(Vminph           , VexRvm_Lx          , E(00MAP5,5D,_,_,_,0,4,FV ), 0                         , 104, 0  , 7123 , 325, 132), // #1145
+  INST(Vminps           , VexRvm_Lx          , V(000F00,5D,_,x,I,0,4,FV ), 0                         , 105, 0  , 7130 , 326, 131), // #1146
+  INST(Vminsd           , VexRvm             , V(F20F00,5D,_,I,I,1,3,T1S), 0                         , 106, 0  , 7137 , 327, 131), // #1147
+  INST(Vminsh           , VexRvm             , E(F3MAP5,5D,_,_,_,0,1,T1S), 0                         , 107, 0  , 7144 , 254, 134), // #1148
+  INST(Vminss           , VexRvm             , V(F30F00,5D,_,I,I,0,2,T1S), 0                         , 108, 0  , 7151 , 258, 131), // #1149
+  INST(Vmlaunch         , X86Op              , O(000F01,C2,_,_,_,_,_,_  ), 0                         , 21 , 0  , 7158 , 30 , 58 ), // #1150
+  INST(Vmload           , X86Op_xAX          , O(000F01,DA,_,_,_,_,_,_  ), 0                         , 21 , 0  , 7167 , 328, 22 ), // #1151
+  INST(Vmmcall          , X86Op              , O(000F01,D9,_,_,_,_,_,_  ), 0                         , 21 , 0  , 7174 , 30 , 22 ), // #1152
+  INST(Vmovapd          , VexRmMr_Lx         , V(660F00,28,_,x,I,1,4,FVM), V(660F00,29,_,x,I,1,4,FVM), 103, 86 , 7182 , 329, 155), // #1153
+  INST(Vmovaps          , VexRmMr_Lx         , V(000F00,28,_,x,I,0,4,FVM), V(000F00,29,_,x,I,0,4,FVM), 105, 87 , 7190 , 329, 155), // #1154
+  INST(Vmovd            , VexMovdMovq        , V(660F00,6E,_,0,0,0,2,T1S), V(660F00,7E,_,0,0,0,2,T1S), 196, 88 , 7198 , 330, 133), // #1155
+  INST(Vmovddup         , VexRm_Lx           , V(F20F00,12,_,x,I,1,3,DUP), 0                         , 197, 0  , 7204 , 331, 131), // #1156
+  INST(Vmovdqa          , VexRmMr_Lx         , V(660F00,6F,_,x,I,_,_,_  ), V(660F00,7F,_,x,I,_,_,_  ), 69 , 89 , 7213 , 332, 156), // #1157
+  INST(Vmovdqa32        , VexRmMr_Lx         , E(660F00,6F,_,x,_,0,4,FVM), E(660F00,7F,_,x,_,0,4,FVM), 198, 90 , 7221 , 333, 157), // #1158
+  INST(Vmovdqa64        , VexRmMr_Lx         , E(660F00,6F,_,x,_,1,4,FVM), E(660F00,7F,_,x,_,1,4,FVM), 135, 91 , 7231 , 333, 157), // #1159
+  INST(Vmovdqu          , VexRmMr_Lx         , V(F30F00,6F,_,x,I,_,_,_  ), V(F30F00,7F,_,x,I,_,_,_  ), 199, 92 , 7241 , 332, 156), // #1160
+  INST(Vmovdqu16        , VexRmMr_Lx         , E(F20F00,6F,_,x,_,1,4,FVM), E(F20F00,7F,_,x,_,1,4,FVM), 166, 93 , 7249 , 333, 158), // #1161
+  INST(Vmovdqu32        , VexRmMr_Lx         , E(F30F00,6F,_,x,_,0,4,FVM), E(F30F00,7F,_,x,_,0,4,FVM), 200, 94 , 7259 , 333, 157), // #1162
+  INST(Vmovdqu64        , VexRmMr_Lx         , E(F30F00,6F,_,x,_,1,4,FVM), E(F30F00,7F,_,x,_,1,4,FVM), 149, 95 , 7269 , 333, 157), // #1163
+  INST(Vmovdqu8         , VexRmMr_Lx         , E(F20F00,6F,_,x,_,0,4,FVM), E(F20F00,7F,_,x,_,0,4,FVM), 164, 96 , 7279 , 333, 158), // #1164
+  INST(Vmovhlps         , VexRvm             , V(000F00,12,_,0,I,0,_,_  ), 0                         , 72 , 0  , 7288 , 334, 133), // #1165
+  INST(Vmovhpd          , VexRvmMr           , V(660F00,16,_,0,I,1,3,T1S), V(660F00,17,_,0,I,1,3,T1S), 125, 97 , 7297 , 335, 133), // #1166
+  INST(Vmovhps          , VexRvmMr           , V(000F00,16,_,0,I,0,3,T2 ), V(000F00,17,_,0,I,0,3,T2 ), 201, 98 , 7305 , 335, 133), // #1167
+  INST(Vmovlhps         , VexRvm             , V(000F00,16,_,0,I,0,_,_  ), 0                         , 72 , 0  , 7313 , 334, 133), // #1168
+  INST(Vmovlpd          , VexRvmMr           , V(660F00,12,_,0,I,1,3,T1S), V(660F00,13,_,0,I,1,3,T1S), 125, 99 , 7322 , 335, 133), // #1169
+  INST(Vmovlps          , VexRvmMr           , V(000F00,12,_,0,I,0,3,T2 ), V(000F00,13,_,0,I,0,3,T2 ), 201, 100, 7330 , 335, 133), // #1170
+  INST(Vmovmskpd        , VexRm_Lx           , V(660F00,50,_,x,I,_,_,_  ), 0                         , 69 , 0  , 7338 , 336, 135), // #1171
+  INST(Vmovmskps        , VexRm_Lx           , V(000F00,50,_,x,I,_,_,_  ), 0                         , 72 , 0  , 7348 , 336, 135), // #1172
+  INST(Vmovntdq         , VexMr_Lx           , V(660F00,E7,_,x,I,0,4,FVM), 0                         , 144, 0  , 7358 , 337, 131), // #1173
+  INST(Vmovntdqa        , VexRm_Lx           , V(660F38,2A,_,x,I,0,4,FVM), 0                         , 110, 0  , 7367 , 338, 142), // #1174
+  INST(Vmovntpd         , VexMr_Lx           , V(660F00,2B,_,x,I,1,4,FVM), 0                         , 103, 0  , 7377 , 337, 131), // #1175
+  INST(Vmovntps         , VexMr_Lx           , V(000F00,2B,_,x,I,0,4,FVM), 0                         , 105, 0  , 7386 , 337, 131), // #1176
+  INST(Vmovq            , VexMovdMovq        , V(660F00,6E,_,0,I,1,3,T1S), V(660F00,7E,_,0,I,1,3,T1S), 125, 101, 7395 , 339, 159), // #1177
+  INST(Vmovsd           , VexMovssMovsd      , V(F20F00,10,_,I,I,1,3,T1S), V(F20F00,11,_,I,I,1,3,T1S), 106, 102, 7401 , 340, 159), // #1178
+  INST(Vmovsh           , VexMovssMovsd      , E(F3MAP5,10,_,I,_,0,1,T1S), E(F3MAP5,11,_,I,_,0,1,T1S), 107, 103, 7408 , 341, 134), // #1179
+  INST(Vmovshdup        , VexRm_Lx           , V(F30F00,16,_,x,I,0,4,FVM), 0                         , 161, 0  , 7415 , 342, 131), // #1180
+  INST(Vmovsldup        , VexRm_Lx           , V(F30F00,12,_,x,I,0,4,FVM), 0                         , 161, 0  , 7425 , 342, 131), // #1181
+  INST(Vmovss           , VexMovssMovsd      , V(F30F00,10,_,I,I,0,2,T1S), V(F30F00,11,_,I,I,0,2,T1S), 108, 104, 7435 , 343, 159), // #1182
+  INST(Vmovupd          , VexRmMr_Lx         , V(660F00,10,_,x,I,1,4,FVM), V(660F00,11,_,x,I,1,4,FVM), 103, 105, 7442 , 329, 155), // #1183
+  INST(Vmovups          , VexRmMr_Lx         , V(000F00,10,_,x,I,0,4,FVM), V(000F00,11,_,x,I,0,4,FVM), 105, 106, 7450 , 329, 155), // #1184
+  INST(Vmovw            , VexMovdMovq        , E(66MAP5,6E,_,0,_,I,1,T1S), E(66MAP5,7E,_,0,_,I,1,T1S), 202, 107, 7458 , 344, 134), // #1185
+  INST(Vmpsadbw         , VexRvmi_Lx         , V(660F3A,42,_,x,I,_,_,_  ), 0                         , 73 , 0  , 7464 , 214, 160), // #1186
+  INST(Vmptrld          , X86M_Only          , O(000F00,C7,6,_,_,_,_,_  ), 0                         , 80 , 0  , 7473 , 32 , 58 ), // #1187
+  INST(Vmptrst          , X86M_Only          , O(000F00,C7,7,_,_,_,_,_  ), 0                         , 22 , 0  , 7481 , 32 , 58 ), // #1188
+  INST(Vmread           , X86Mr_NoSize       , O(000F00,78,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7489 , 345, 58 ), // #1189
+  INST(Vmresume         , X86Op              , O(000F01,C3,_,_,_,_,_,_  ), 0                         , 21 , 0  , 7496 , 30 , 58 ), // #1190
+  INST(Vmrun            , X86Op_xAX          , O(000F01,D8,_,_,_,_,_,_  ), 0                         , 21 , 0  , 7505 , 328, 22 ), // #1191
+  INST(Vmsave           , X86Op_xAX          , O(000F01,DB,_,_,_,_,_,_  ), 0                         , 21 , 0  , 7511 , 328, 22 ), // #1192
+  INST(Vmulpd           , VexRvm_Lx          , V(660F00,59,_,x,I,1,4,FV ), 0                         , 103, 0  , 7518 , 196, 131), // #1193
+  INST(Vmulph           , VexRvm_Lx          , E(00MAP5,59,_,_,_,0,4,FV ), 0                         , 104, 0  , 7525 , 197, 132), // #1194
+  INST(Vmulps           , VexRvm_Lx          , V(000F00,59,_,x,I,0,4,FV ), 0                         , 105, 0  , 7532 , 198, 131), // #1195
+  INST(Vmulsd           , VexRvm             , V(F20F00,59,_,I,I,1,3,T1S), 0                         , 106, 0  , 7539 , 199, 133), // #1196
+  INST(Vmulsh           , VexRvm             , E(F3MAP5,59,_,_,_,0,1,T1S), 0                         , 107, 0  , 7546 , 200, 134), // #1197
+  INST(Vmulss           , VexRvm             , V(F30F00,59,_,I,I,0,2,T1S), 0                         , 108, 0  , 7553 , 201, 133), // #1198
+  INST(Vmwrite          , X86Rm_NoSize       , O(000F00,79,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7560 , 346, 58 ), // #1199
+  INST(Vmxon            , X86M_Only          , O(F30F00,C7,6,_,_,_,_,_  ), 0                         , 24 , 0  , 7568 , 32 , 58 ), // #1200
+  INST(Vorpd            , VexRvm_Lx          , V(660F00,56,_,x,I,1,4,FV ), 0                         , 103, 0  , 7574 , 210, 139), // #1201
+  INST(Vorps            , VexRvm_Lx          , V(000F00,56,_,x,I,0,4,FV ), 0                         , 105, 0  , 7580 , 211, 139), // #1202
+  INST(Vp2intersectd    , VexRvm_Lx_2xK      , E(F20F38,68,_,_,_,0,4,FV ), 0                         , 131, 0  , 7586 , 347, 161), // #1203
+  INST(Vp2intersectq    , VexRvm_Lx_2xK      , E(F20F38,68,_,_,_,1,4,FV ), 0                         , 203, 0  , 7600 , 348, 161), // #1204
+  INST(Vp4dpwssd        , VexRm_T1_4X        , E(F20F38,52,_,2,_,0,4,T4X), 0                         , 101, 0  , 7614 , 194, 162), // #1205
+  INST(Vp4dpwssds       , VexRm_T1_4X        , E(F20F38,53,_,2,_,0,4,T4X), 0                         , 101, 0  , 7624 , 194, 162), // #1206
+  INST(Vpabsb           , VexRm_Lx           , V(660F38,1C,_,x,I,_,4,FVM), 0                         , 110, 0  , 7635 , 342, 163), // #1207
+  INST(Vpabsd           , VexRm_Lx           , V(660F38,1E,_,x,I,0,4,FV ), 0                         , 110, 0  , 7642 , 349, 142), // #1208
+  INST(Vpabsq           , VexRm_Lx           , E(660F38,1F,_,x,_,1,4,FV ), 0                         , 113, 0  , 7649 , 350, 138), // #1209
+  INST(Vpabsw           , VexRm_Lx           , V(660F38,1D,_,x,I,_,4,FVM), 0                         , 110, 0  , 7656 , 342, 163), // #1210
+  INST(Vpackssdw        , VexRvm_Lx          , V(660F00,6B,_,x,I,0,4,FV ), 0                         , 144, 0  , 7663 , 209, 163), // #1211
+  INST(Vpacksswb        , VexRvm_Lx          , V(660F00,63,_,x,I,I,4,FVM), 0                         , 144, 0  , 7673 , 315, 163), // #1212
+  INST(Vpackusdw        , VexRvm_Lx          , V(660F38,2B,_,x,I,0,4,FV ), 0                         , 110, 0  , 7683 , 209, 163), // #1213
+  INST(Vpackuswb        , VexRvm_Lx          , V(660F00,67,_,x,I,I,4,FVM), 0                         , 144, 0  , 7693 , 315, 163), // #1214
+  INST(Vpaddb           , VexRvm_Lx          , V(660F00,FC,_,x,I,I,4,FVM), 0                         , 144, 0  , 7703 , 315, 163), // #1215
+  INST(Vpaddd           , VexRvm_Lx          , V(660F00,FE,_,x,I,0,4,FV ), 0                         , 144, 0  , 7710 , 209, 142), // #1216
+  INST(Vpaddq           , VexRvm_Lx          , V(660F00,D4,_,x,I,1,4,FV ), 0                         , 103, 0  , 7717 , 208, 142), // #1217
+  INST(Vpaddsb          , VexRvm_Lx          , V(660F00,EC,_,x,I,I,4,FVM), 0                         , 144, 0  , 7724 , 315, 163), // #1218
+  INST(Vpaddsw          , VexRvm_Lx          , V(660F00,ED,_,x,I,I,4,FVM), 0                         , 144, 0  , 7732 , 315, 163), // #1219
+  INST(Vpaddusb         , VexRvm_Lx          , V(660F00,DC,_,x,I,I,4,FVM), 0                         , 144, 0  , 7740 , 315, 163), // #1220
+  INST(Vpaddusw         , VexRvm_Lx          , V(660F00,DD,_,x,I,I,4,FVM), 0                         , 144, 0  , 7749 , 315, 163), // #1221
+  INST(Vpaddw           , VexRvm_Lx          , V(660F00,FD,_,x,I,I,4,FVM), 0                         , 144, 0  , 7758 , 315, 163), // #1222
+  INST(Vpalignr         , VexRvmi_Lx         , V(660F3A,0F,_,x,I,I,4,FVM), 0                         , 204, 0  , 7765 , 314, 163), // #1223
+  INST(Vpand            , VexRvm_Lx          , V(660F00,DB,_,x,I,_,_,_  ), 0                         , 69 , 0  , 7774 , 351, 160), // #1224
+  INST(Vpandd           , VexRvm_Lx          , E(660F00,DB,_,x,_,0,4,FV ), 0                         , 198, 0  , 7780 , 352, 138), // #1225
+  INST(Vpandn           , VexRvm_Lx          , V(660F00,DF,_,x,I,_,_,_  ), 0                         , 69 , 0  , 7787 , 353, 160), // #1226
+  INST(Vpandnd          , VexRvm_Lx          , E(660F00,DF,_,x,_,0,4,FV ), 0                         , 198, 0  , 7794 , 354, 138), // #1227
+  INST(Vpandnq          , VexRvm_Lx          , E(660F00,DF,_,x,_,1,4,FV ), 0                         , 135, 0  , 7802 , 355, 138), // #1228
+  INST(Vpandq           , VexRvm_Lx          , E(660F00,DB,_,x,_,1,4,FV ), 0                         , 135, 0  , 7810 , 356, 138), // #1229
+  INST(Vpavgb           , VexRvm_Lx          , V(660F00,E0,_,x,I,I,4,FVM), 0                         , 144, 0  , 7817 , 315, 163), // #1230
+  INST(Vpavgw           , VexRvm_Lx          , V(660F00,E3,_,x,I,I,4,FVM), 0                         , 144, 0  , 7824 , 315, 163), // #1231
+  INST(Vpblendd         , VexRvmi_Lx         , V(660F3A,02,_,x,0,_,_,_  ), 0                         , 73 , 0  , 7831 , 214, 141), // #1232
+  INST(Vpblendmb        , VexRvm_Lx          , E(660F38,66,_,x,_,0,4,FVM), 0                         , 114, 0  , 7840 , 357, 146), // #1233
+  INST(Vpblendmd        , VexRvm_Lx          , E(660F38,64,_,x,_,0,4,FV ), 0                         , 114, 0  , 7850 , 213, 138), // #1234
+  INST(Vpblendmq        , VexRvm_Lx          , E(660F38,64,_,x,_,1,4,FV ), 0                         , 113, 0  , 7860 , 212, 138), // #1235
+  INST(Vpblendmw        , VexRvm_Lx          , E(660F38,66,_,x,_,1,4,FVM), 0                         , 113, 0  , 7870 , 357, 146), // #1236
+  INST(Vpblendvb        , VexRvmr_Lx         , V(660F3A,4C,_,x,0,_,_,_  ), 0                         , 73 , 0  , 7880 , 215, 160), // #1237
+  INST(Vpblendw         , VexRvmi_Lx         , V(660F3A,0E,_,x,I,_,_,_  ), 0                         , 73 , 0  , 7890 , 214, 160), // #1238
+  INST(Vpbroadcastb     , VexRm_Lx_Bcst      , V(660F38,78,_,x,0,0,0,T1S), E(660F38,7A,_,x,0,0,0,T1S), 96 , 108, 7899 , 358, 164), // #1239
+  INST(Vpbroadcastd     , VexRm_Lx_Bcst      , V(660F38,58,_,x,0,0,2,T1S), E(660F38,7C,_,x,0,0,0,T1S), 122, 109, 7912 , 359, 152), // #1240
+  INST(Vpbroadcastmb2q  , VexRm_Lx           , E(F30F38,2A,_,x,_,1,_,_  ), 0                         , 205, 0  , 7925 , 360, 165), // #1241
+  INST(Vpbroadcastmw2d  , VexRm_Lx           , E(F30F38,3A,_,x,_,0,_,_  ), 0                         , 206, 0  , 7941 , 360, 165), // #1242
+  INST(Vpbroadcastq     , VexRm_Lx_Bcst      , V(660F38,59,_,x,0,1,3,T1S), E(660F38,7C,_,x,0,1,0,T1S), 121, 110, 7957 , 361, 152), // #1243
+  INST(Vpbroadcastw     , VexRm_Lx_Bcst      , V(660F38,79,_,x,0,0,1,T1S), E(660F38,7B,_,x,0,0,0,T1S), 207, 111, 7970 , 362, 164), // #1244
+  INST(Vpclmulqdq       , VexRvmi_Lx         , V(660F3A,44,_,x,I,_,4,FVM), 0                         , 204, 0  , 7983 , 363, 166), // #1245
+  INST(Vpcmov           , VexRvrmRvmr_Lx     , V(XOP_M8,A2,_,x,x,_,_,_  ), 0                         , 208, 0  , 7994 , 289, 151), // #1246
+  INST(Vpcmpb           , VexRvmi_Lx         , E(660F3A,3F,_,x,_,0,4,FVM), 0                         , 111, 0  , 8001 , 364, 146), // #1247
+  INST(Vpcmpd           , VexRvmi_Lx         , E(660F3A,1F,_,x,_,0,4,FV ), 0                         , 111, 0  , 8008 , 365, 138), // #1248
+  INST(Vpcmpeqb         , VexRvm_Lx_KEvex    , V(660F00,74,_,x,I,I,4,FV ), 0                         , 144, 0  , 8015 , 366, 163), // #1249
+  INST(Vpcmpeqd         , VexRvm_Lx_KEvex    , V(660F00,76,_,x,I,0,4,FVM), 0                         , 144, 0  , 8024 , 367, 142), // #1250
+  INST(Vpcmpeqq         , VexRvm_Lx_KEvex    , V(660F38,29,_,x,I,1,4,FVM), 0                         , 209, 0  , 8033 , 368, 142), // #1251
+  INST(Vpcmpeqw         , VexRvm_Lx_KEvex    , V(660F00,75,_,x,I,I,4,FV ), 0                         , 144, 0  , 8042 , 366, 163), // #1252
+  INST(Vpcmpestri       , VexRmi             , V(660F3A,61,_,0,I,_,_,_  ), 0                         , 73 , 0  , 8051 , 369, 167), // #1253
+  INST(Vpcmpestrm       , VexRmi             , V(660F3A,60,_,0,I,_,_,_  ), 0                         , 73 , 0  , 8062 , 370, 167), // #1254
+  INST(Vpcmpgtb         , VexRvm_Lx_KEvex    , V(660F00,64,_,x,I,I,4,FV ), 0                         , 144, 0  , 8073 , 366, 163), // #1255
+  INST(Vpcmpgtd         , VexRvm_Lx_KEvex    , V(660F00,66,_,x,I,0,4,FVM), 0                         , 144, 0  , 8082 , 367, 142), // #1256
+  INST(Vpcmpgtq         , VexRvm_Lx_KEvex    , V(660F38,37,_,x,I,1,4,FVM), 0                         , 209, 0  , 8091 , 368, 142), // #1257
+  INST(Vpcmpgtw         , VexRvm_Lx_KEvex    , V(660F00,65,_,x,I,I,4,FV ), 0                         , 144, 0  , 8100 , 366, 163), // #1258
+  INST(Vpcmpistri       , VexRmi             , V(660F3A,63,_,0,I,_,_,_  ), 0                         , 73 , 0  , 8109 , 371, 167), // #1259
+  INST(Vpcmpistrm       , VexRmi             , V(660F3A,62,_,0,I,_,_,_  ), 0                         , 73 , 0  , 8120 , 372, 167), // #1260
+  INST(Vpcmpq           , VexRvmi_Lx         , E(660F3A,1F,_,x,_,1,4,FV ), 0                         , 112, 0  , 8131 , 373, 138), // #1261
+  INST(Vpcmpub          , VexRvmi_Lx         , E(660F3A,3E,_,x,_,0,4,FVM), 0                         , 111, 0  , 8138 , 364, 146), // #1262
+  INST(Vpcmpud          , VexRvmi_Lx         , E(660F3A,1E,_,x,_,0,4,FV ), 0                         , 111, 0  , 8146 , 365, 138), // #1263
+  INST(Vpcmpuq          , VexRvmi_Lx         , E(660F3A,1E,_,x,_,1,4,FV ), 0                         , 112, 0  , 8154 , 373, 138), // #1264
+  INST(Vpcmpuw          , VexRvmi_Lx         , E(660F3A,3E,_,x,_,1,4,FVM), 0                         , 112, 0  , 8162 , 373, 146), // #1265
+  INST(Vpcmpw           , VexRvmi_Lx         , E(660F3A,3F,_,x,_,1,4,FVM), 0                         , 112, 0  , 8170 , 373, 146), // #1266
+  INST(Vpcomb           , VexRvmi            , V(XOP_M8,CC,_,0,0,_,_,_  ), 0                         , 208, 0  , 8177 , 276, 151), // #1267
+  INST(Vpcomd           , VexRvmi            , V(XOP_M8,CE,_,0,0,_,_,_  ), 0                         , 208, 0  , 8184 , 276, 151), // #1268
+  INST(Vpcompressb      , VexMr_Lx           , E(660F38,63,_,x,_,0,0,T1S), 0                         , 210, 0  , 8191 , 232, 168), // #1269
+  INST(Vpcompressd      , VexMr_Lx           , E(660F38,8B,_,x,_,0,2,T1S), 0                         , 129, 0  , 8203 , 232, 138), // #1270
+  INST(Vpcompressq      , VexMr_Lx           , E(660F38,8B,_,x,_,1,3,T1S), 0                         , 128, 0  , 8215 , 232, 138), // #1271
+  INST(Vpcompressw      , VexMr_Lx           , E(660F38,63,_,x,_,1,1,T1S), 0                         , 211, 0  , 8227 , 232, 168), // #1272
+  INST(Vpcomq           , VexRvmi            , V(XOP_M8,CF,_,0,0,_,_,_  ), 0                         , 208, 0  , 8239 , 276, 151), // #1273
+  INST(Vpcomub          , VexRvmi            , V(XOP_M8,EC,_,0,0,_,_,_  ), 0                         , 208, 0  , 8246 , 276, 151), // #1274
+  INST(Vpcomud          , VexRvmi            , V(XOP_M8,EE,_,0,0,_,_,_  ), 0                         , 208, 0  , 8254 , 276, 151), // #1275
+  INST(Vpcomuq          , VexRvmi            , V(XOP_M8,EF,_,0,0,_,_,_  ), 0                         , 208, 0  , 8262 , 276, 151), // #1276
+  INST(Vpcomuw          , VexRvmi            , V(XOP_M8,ED,_,0,0,_,_,_  ), 0                         , 208, 0  , 8270 , 276, 151), // #1277
+  INST(Vpcomw           , VexRvmi            , V(XOP_M8,CD,_,0,0,_,_,_  ), 0                         , 208, 0  , 8278 , 276, 151), // #1278
+  INST(Vpconflictd      , VexRm_Lx           , E(660F38,C4,_,x,_,0,4,FV ), 0                         , 114, 0  , 8285 , 374, 165), // #1279
+  INST(Vpconflictq      , VexRm_Lx           , E(660F38,C4,_,x,_,1,4,FV ), 0                         , 113, 0  , 8297 , 374, 165), // #1280
+  INST(Vpdpbusd         , VexRvm_Lx          , V(660F38,50,_,x,_,0,4,FV ), 0                         , 110, 0  , 8309 , 375, 169), // #1281
+  INST(Vpdpbusds        , VexRvm_Lx          , V(660F38,51,_,x,_,0,4,FV ), 0                         , 110, 0  , 8318 , 375, 169), // #1282
+  INST(Vpdpwssd         , VexRvm_Lx          , V(660F38,52,_,x,_,0,4,FV ), 0                         , 110, 0  , 8328 , 375, 169), // #1283
+  INST(Vpdpwssds        , VexRvm_Lx          , V(660F38,53,_,x,_,0,4,FV ), 0                         , 110, 0  , 8337 , 375, 169), // #1284
+  INST(Vperm2f128       , VexRvmi            , V(660F3A,06,_,1,0,_,_,_  ), 0                         , 172, 0  , 8347 , 376, 135), // #1285
+  INST(Vperm2i128       , VexRvmi            , V(660F3A,46,_,1,0,_,_,_  ), 0                         , 172, 0  , 8358 , 376, 141), // #1286
+  INST(Vpermb           , VexRvm_Lx          , E(660F38,8D,_,x,_,0,4,FVM), 0                         , 114, 0  , 8369 , 357, 170), // #1287
+  INST(Vpermd           , VexRvm_Lx          , V(660F38,36,_,x,0,0,4,FV ), 0                         , 110, 0  , 8376 , 377, 152), // #1288
+  INST(Vpermi2b         , VexRvm_Lx          , E(660F38,75,_,x,_,0,4,FVM), 0                         , 114, 0  , 8383 , 357, 170), // #1289
+  INST(Vpermi2d         , VexRvm_Lx          , E(660F38,76,_,x,_,0,4,FV ), 0                         , 114, 0  , 8392 , 213, 138), // #1290
+  INST(Vpermi2pd        , VexRvm_Lx          , E(660F38,77,_,x,_,1,4,FV ), 0                         , 113, 0  , 8401 , 212, 138), // #1291
+  INST(Vpermi2ps        , VexRvm_Lx          , E(660F38,77,_,x,_,0,4,FV ), 0                         , 114, 0  , 8411 , 213, 138), // #1292
+  INST(Vpermi2q         , VexRvm_Lx          , E(660F38,76,_,x,_,1,4,FV ), 0                         , 113, 0  , 8421 , 212, 138), // #1293
+  INST(Vpermi2w         , VexRvm_Lx          , E(660F38,75,_,x,_,1,4,FVM), 0                         , 113, 0  , 8430 , 357, 146), // #1294
+  INST(Vpermil2pd       , VexRvrmiRvmri_Lx   , V(660F3A,49,_,x,x,_,_,_  ), 0                         , 73 , 0  , 8439 , 378, 151), // #1295
+  INST(Vpermil2ps       , VexRvrmiRvmri_Lx   , V(660F3A,48,_,x,x,_,_,_  ), 0                         , 73 , 0  , 8450 , 378, 151), // #1296
+  INST(Vpermilpd        , VexRvmRmi_Lx       , V(660F38,0D,_,x,0,1,4,FV ), V(660F3A,05,_,x,0,1,4,FV ), 209, 112, 8461 , 379, 131), // #1297
+  INST(Vpermilps        , VexRvmRmi_Lx       , V(660F38,0C,_,x,0,0,4,FV ), V(660F3A,04,_,x,0,0,4,FV ), 110, 113, 8471 , 380, 131), // #1298
+  INST(Vpermpd          , VexRvmRmi_Lx       , E(660F38,16,_,x,1,1,4,FV ), V(660F3A,01,_,x,1,1,4,FV ), 212, 114, 8481 , 381, 152), // #1299
+  INST(Vpermps          , VexRvm_Lx          , V(660F38,16,_,x,0,0,4,FV ), 0                         , 110, 0  , 8489 , 377, 152), // #1300
+  INST(Vpermq           , VexRvmRmi_Lx       , E(660F38,36,_,x,_,1,4,FV ), V(660F3A,00,_,x,1,1,4,FV ), 113, 115, 8497 , 381, 152), // #1301
+  INST(Vpermt2b         , VexRvm_Lx          , E(660F38,7D,_,x,_,0,4,FVM), 0                         , 114, 0  , 8504 , 357, 170), // #1302
+  INST(Vpermt2d         , VexRvm_Lx          , E(660F38,7E,_,x,_,0,4,FV ), 0                         , 114, 0  , 8513 , 213, 138), // #1303
+  INST(Vpermt2pd        , VexRvm_Lx          , E(660F38,7F,_,x,_,1,4,FV ), 0                         , 113, 0  , 8522 , 212, 138), // #1304
+  INST(Vpermt2ps        , VexRvm_Lx          , E(660F38,7F,_,x,_,0,4,FV ), 0                         , 114, 0  , 8532 , 213, 138), // #1305
+  INST(Vpermt2q         , VexRvm_Lx          , E(660F38,7E,_,x,_,1,4,FV ), 0                         , 113, 0  , 8542 , 212, 138), // #1306
+  INST(Vpermt2w         , VexRvm_Lx          , E(660F38,7D,_,x,_,1,4,FVM), 0                         , 113, 0  , 8551 , 357, 146), // #1307
+  INST(Vpermw           , VexRvm_Lx          , E(660F38,8D,_,x,_,1,4,FVM), 0                         , 113, 0  , 8560 , 357, 146), // #1308
+  INST(Vpexpandb        , VexRm_Lx           , E(660F38,62,_,x,_,0,0,T1S), 0                         , 210, 0  , 8567 , 279, 168), // #1309
+  INST(Vpexpandd        , VexRm_Lx           , E(660F38,89,_,x,_,0,2,T1S), 0                         , 129, 0  , 8577 , 279, 138), // #1310
+  INST(Vpexpandq        , VexRm_Lx           , E(660F38,89,_,x,_,1,3,T1S), 0                         , 128, 0  , 8587 , 279, 138), // #1311
+  INST(Vpexpandw        , VexRm_Lx           , E(660F38,62,_,x,_,1,1,T1S), 0                         , 211, 0  , 8597 , 279, 168), // #1312
+  INST(Vpextrb          , VexMri             , V(660F3A,14,_,0,0,I,0,T1S), 0                         , 73 , 0  , 8607 , 382, 171), // #1313
+  INST(Vpextrd          , VexMri             , V(660F3A,16,_,0,0,0,2,T1S), 0                         , 177, 0  , 8615 , 283, 172), // #1314
+  INST(Vpextrq          , VexMri             , V(660F3A,16,_,0,1,1,3,T1S), 0                         , 213, 0  , 8623 , 383, 172), // #1315
+  INST(Vpextrw          , VexMri_Vpextrw     , V(660F3A,15,_,0,0,I,1,T1S), 0                         , 214, 0  , 8631 , 384, 171), // #1316
+  INST(Vpgatherdd       , VexRmvRm_VM        , V(660F38,90,_,x,0,_,_,_  ), E(660F38,90,_,x,_,0,2,T1S), 96 , 116, 8639 , 302, 152), // #1317
+  INST(Vpgatherdq       , VexRmvRm_VM        , V(660F38,90,_,x,1,_,_,_  ), E(660F38,90,_,x,_,1,3,T1S), 189, 117, 8650 , 301, 152), // #1318
+  INST(Vpgatherqd       , VexRmvRm_VM        , V(660F38,91,_,x,0,_,_,_  ), E(660F38,91,_,x,_,0,2,T1S), 96 , 118, 8661 , 307, 152), // #1319
+  INST(Vpgatherqq       , VexRmvRm_VM        , V(660F38,91,_,x,1,_,_,_  ), E(660F38,91,_,x,_,1,3,T1S), 189, 119, 8672 , 306, 152), // #1320
+  INST(Vphaddbd         , VexRm              , V(XOP_M9,C2,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8683 , 204, 151), // #1321
+  INST(Vphaddbq         , VexRm              , V(XOP_M9,C3,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8692 , 204, 151), // #1322
+  INST(Vphaddbw         , VexRm              , V(XOP_M9,C1,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8701 , 204, 151), // #1323
+  INST(Vphaddd          , VexRvm_Lx          , V(660F38,02,_,x,I,_,_,_  ), 0                         , 96 , 0  , 8710 , 202, 160), // #1324
+  INST(Vphadddq         , VexRm              , V(XOP_M9,CB,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8718 , 204, 151), // #1325
+  INST(Vphaddsw         , VexRvm_Lx          , V(660F38,03,_,x,I,_,_,_  ), 0                         , 96 , 0  , 8727 , 202, 160), // #1326
+  INST(Vphaddubd        , VexRm              , V(XOP_M9,D2,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8736 , 204, 151), // #1327
+  INST(Vphaddubq        , VexRm              , V(XOP_M9,D3,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8746 , 204, 151), // #1328
+  INST(Vphaddubw        , VexRm              , V(XOP_M9,D1,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8756 , 204, 151), // #1329
+  INST(Vphaddudq        , VexRm              , V(XOP_M9,DB,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8766 , 204, 151), // #1330
+  INST(Vphadduwd        , VexRm              , V(XOP_M9,D6,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8776 , 204, 151), // #1331
+  INST(Vphadduwq        , VexRm              , V(XOP_M9,D7,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8786 , 204, 151), // #1332
+  INST(Vphaddw          , VexRvm_Lx          , V(660F38,01,_,x,I,_,_,_  ), 0                         , 96 , 0  , 8796 , 202, 160), // #1333
+  INST(Vphaddwd         , VexRm              , V(XOP_M9,C6,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8804 , 204, 151), // #1334
+  INST(Vphaddwq         , VexRm              , V(XOP_M9,C7,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8813 , 204, 151), // #1335
+  INST(Vphminposuw      , VexRm              , V(660F38,41,_,0,I,_,_,_  ), 0                         , 96 , 0  , 8822 , 204, 135), // #1336
+  INST(Vphsubbw         , VexRm              , V(XOP_M9,E1,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8834 , 204, 151), // #1337
+  INST(Vphsubd          , VexRvm_Lx          , V(660F38,06,_,x,I,_,_,_  ), 0                         , 96 , 0  , 8843 , 202, 160), // #1338
+  INST(Vphsubdq         , VexRm              , V(XOP_M9,E3,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8851 , 204, 151), // #1339
+  INST(Vphsubsw         , VexRvm_Lx          , V(660F38,07,_,x,I,_,_,_  ), 0                         , 96 , 0  , 8860 , 202, 160), // #1340
+  INST(Vphsubw          , VexRvm_Lx          , V(660F38,05,_,x,I,_,_,_  ), 0                         , 96 , 0  , 8869 , 202, 160), // #1341
+  INST(Vphsubwd         , VexRm              , V(XOP_M9,E2,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8877 , 204, 151), // #1342
+  INST(Vpinsrb          , VexRvmi            , V(660F3A,20,_,0,0,I,0,T1S), 0                         , 73 , 0  , 8886 , 385, 171), // #1343
+  INST(Vpinsrd          , VexRvmi            , V(660F3A,22,_,0,0,0,2,T1S), 0                         , 177, 0  , 8894 , 386, 172), // #1344
+  INST(Vpinsrq          , VexRvmi            , V(660F3A,22,_,0,1,1,3,T1S), 0                         , 213, 0  , 8902 , 387, 172), // #1345
+  INST(Vpinsrw          , VexRvmi            , V(660F00,C4,_,0,0,I,1,T1S), 0                         , 215, 0  , 8910 , 388, 171), // #1346
+  INST(Vplzcntd         , VexRm_Lx           , E(660F38,44,_,x,_,0,4,FV ), 0                         , 114, 0  , 8918 , 374, 165), // #1347
+  INST(Vplzcntq         , VexRm_Lx           , E(660F38,44,_,x,_,1,4,FV ), 0                         , 113, 0  , 8927 , 350, 165), // #1348
+  INST(Vpmacsdd         , VexRvmr            , V(XOP_M8,9E,_,0,0,_,_,_  ), 0                         , 208, 0  , 8936 , 389, 151), // #1349
+  INST(Vpmacsdqh        , VexRvmr            , V(XOP_M8,9F,_,0,0,_,_,_  ), 0                         , 208, 0  , 8945 , 389, 151), // #1350
+  INST(Vpmacsdql        , VexRvmr            , V(XOP_M8,97,_,0,0,_,_,_  ), 0                         , 208, 0  , 8955 , 389, 151), // #1351
+  INST(Vpmacssdd        , VexRvmr            , V(XOP_M8,8E,_,0,0,_,_,_  ), 0                         , 208, 0  , 8965 , 389, 151), // #1352
+  INST(Vpmacssdqh       , VexRvmr            , V(XOP_M8,8F,_,0,0,_,_,_  ), 0                         , 208, 0  , 8975 , 389, 151), // #1353
+  INST(Vpmacssdql       , VexRvmr            , V(XOP_M8,87,_,0,0,_,_,_  ), 0                         , 208, 0  , 8986 , 389, 151), // #1354
+  INST(Vpmacsswd        , VexRvmr            , V(XOP_M8,86,_,0,0,_,_,_  ), 0                         , 208, 0  , 8997 , 389, 151), // #1355
+  INST(Vpmacssww        , VexRvmr            , V(XOP_M8,85,_,0,0,_,_,_  ), 0                         , 208, 0  , 9007 , 389, 151), // #1356
+  INST(Vpmacswd         , VexRvmr            , V(XOP_M8,96,_,0,0,_,_,_  ), 0                         , 208, 0  , 9017 , 389, 151), // #1357
+  INST(Vpmacsww         , VexRvmr            , V(XOP_M8,95,_,0,0,_,_,_  ), 0                         , 208, 0  , 9026 , 389, 151), // #1358
+  INST(Vpmadcsswd       , VexRvmr            , V(XOP_M8,A6,_,0,0,_,_,_  ), 0                         , 208, 0  , 9035 , 389, 151), // #1359
+  INST(Vpmadcswd        , VexRvmr            , V(XOP_M8,B6,_,0,0,_,_,_  ), 0                         , 208, 0  , 9046 , 389, 151), // #1360
+  INST(Vpmadd52huq      , VexRvm_Lx          , E(660F38,B5,_,x,_,1,4,FV ), 0                         , 113, 0  , 9056 , 212, 173), // #1361
+  INST(Vpmadd52luq      , VexRvm_Lx          , E(660F38,B4,_,x,_,1,4,FV ), 0                         , 113, 0  , 9068 , 212, 173), // #1362
+  INST(Vpmaddubsw       , VexRvm_Lx          , V(660F38,04,_,x,I,I,4,FVM), 0                         , 110, 0  , 9080 , 315, 163), // #1363
+  INST(Vpmaddwd         , VexRvm_Lx          , V(660F00,F5,_,x,I,I,4,FVM), 0                         , 144, 0  , 9091 , 315, 163), // #1364
+  INST(Vpmaskmovd       , VexRvmMvr_Lx       , V(660F38,8C,_,x,0,_,_,_  ), V(660F38,8E,_,x,0,_,_,_  ), 96 , 120, 9100 , 323, 141), // #1365
+  INST(Vpmaskmovq       , VexRvmMvr_Lx       , V(660F38,8C,_,x,1,_,_,_  ), V(660F38,8E,_,x,1,_,_,_  ), 189, 121, 9111 , 323, 141), // #1366
+  INST(Vpmaxsb          , VexRvm_Lx          , V(660F38,3C,_,x,I,I,4,FVM), 0                         , 110, 0  , 9122 , 390, 163), // #1367
+  INST(Vpmaxsd          , VexRvm_Lx          , V(660F38,3D,_,x,I,0,4,FV ), 0                         , 110, 0  , 9130 , 211, 142), // #1368
+  INST(Vpmaxsq          , VexRvm_Lx          , E(660F38,3D,_,x,_,1,4,FV ), 0                         , 113, 0  , 9138 , 212, 138), // #1369
+  INST(Vpmaxsw          , VexRvm_Lx          , V(660F00,EE,_,x,I,I,4,FVM), 0                         , 144, 0  , 9146 , 390, 163), // #1370
+  INST(Vpmaxub          , VexRvm_Lx          , V(660F00,DE,_,x,I,I,4,FVM), 0                         , 144, 0  , 9154 , 390, 163), // #1371
+  INST(Vpmaxud          , VexRvm_Lx          , V(660F38,3F,_,x,I,0,4,FV ), 0                         , 110, 0  , 9162 , 211, 142), // #1372
+  INST(Vpmaxuq          , VexRvm_Lx          , E(660F38,3F,_,x,_,1,4,FV ), 0                         , 113, 0  , 9170 , 212, 138), // #1373
+  INST(Vpmaxuw          , VexRvm_Lx          , V(660F38,3E,_,x,I,I,4,FVM), 0                         , 110, 0  , 9178 , 390, 163), // #1374
+  INST(Vpminsb          , VexRvm_Lx          , V(660F38,38,_,x,I,I,4,FVM), 0                         , 110, 0  , 9186 , 390, 163), // #1375
+  INST(Vpminsd          , VexRvm_Lx          , V(660F38,39,_,x,I,0,4,FV ), 0                         , 110, 0  , 9194 , 211, 142), // #1376
+  INST(Vpminsq          , VexRvm_Lx          , E(660F38,39,_,x,_,1,4,FV ), 0                         , 113, 0  , 9202 , 212, 138), // #1377
+  INST(Vpminsw          , VexRvm_Lx          , V(660F00,EA,_,x,I,I,4,FVM), 0                         , 144, 0  , 9210 , 390, 163), // #1378
+  INST(Vpminub          , VexRvm_Lx          , V(660F00,DA,_,x,I,_,4,FVM), 0                         , 144, 0  , 9218 , 390, 163), // #1379
+  INST(Vpminud          , VexRvm_Lx          , V(660F38,3B,_,x,I,0,4,FV ), 0                         , 110, 0  , 9226 , 211, 142), // #1380
+  INST(Vpminuq          , VexRvm_Lx          , E(660F38,3B,_,x,_,1,4,FV ), 0                         , 113, 0  , 9234 , 212, 138), // #1381
+  INST(Vpminuw          , VexRvm_Lx          , V(660F38,3A,_,x,I,_,4,FVM), 0                         , 110, 0  , 9242 , 390, 163), // #1382
+  INST(Vpmovb2m         , VexRm_Lx           , E(F30F38,29,_,x,_,0,_,_  ), 0                         , 206, 0  , 9250 , 391, 146), // #1383
+  INST(Vpmovd2m         , VexRm_Lx           , E(F30F38,39,_,x,_,0,_,_  ), 0                         , 206, 0  , 9259 , 391, 140), // #1384
+  INST(Vpmovdb          , VexMr_Lx           , E(F30F38,31,_,x,_,0,2,QVM), 0                         , 216, 0  , 9268 , 392, 138), // #1385
+  INST(Vpmovdw          , VexMr_Lx           , E(F30F38,33,_,x,_,0,3,HVM), 0                         , 217, 0  , 9276 , 393, 138), // #1386
+  INST(Vpmovm2b         , VexRm_Lx           , E(F30F38,28,_,x,_,0,_,_  ), 0                         , 206, 0  , 9284 , 360, 146), // #1387
+  INST(Vpmovm2d         , VexRm_Lx           , E(F30F38,38,_,x,_,0,_,_  ), 0                         , 206, 0  , 9293 , 360, 140), // #1388
+  INST(Vpmovm2q         , VexRm_Lx           , E(F30F38,38,_,x,_,1,_,_  ), 0                         , 205, 0  , 9302 , 360, 140), // #1389
+  INST(Vpmovm2w         , VexRm_Lx           , E(F30F38,28,_,x,_,1,_,_  ), 0                         , 205, 0  , 9311 , 360, 146), // #1390
+  INST(Vpmovmskb        , VexRm_Lx           , V(660F00,D7,_,x,I,_,_,_  ), 0                         , 69 , 0  , 9320 , 336, 160), // #1391
+  INST(Vpmovq2m         , VexRm_Lx           , E(F30F38,39,_,x,_,1,_,_  ), 0                         , 205, 0  , 9330 , 391, 140), // #1392
+  INST(Vpmovqb          , VexMr_Lx           , E(F30F38,32,_,x,_,0,1,OVM), 0                         , 218, 0  , 9339 , 394, 138), // #1393
+  INST(Vpmovqd          , VexMr_Lx           , E(F30F38,35,_,x,_,0,3,HVM), 0                         , 217, 0  , 9347 , 393, 138), // #1394
+  INST(Vpmovqw          , VexMr_Lx           , E(F30F38,34,_,x,_,0,2,QVM), 0                         , 216, 0  , 9355 , 392, 138), // #1395
+  INST(Vpmovsdb         , VexMr_Lx           , E(F30F38,21,_,x,_,0,2,QVM), 0                         , 216, 0  , 9363 , 392, 138), // #1396
+  INST(Vpmovsdw         , VexMr_Lx           , E(F30F38,23,_,x,_,0,3,HVM), 0                         , 217, 0  , 9372 , 393, 138), // #1397
+  INST(Vpmovsqb         , VexMr_Lx           , E(F30F38,22,_,x,_,0,1,OVM), 0                         , 218, 0  , 9381 , 394, 138), // #1398
+  INST(Vpmovsqd         , VexMr_Lx           , E(F30F38,25,_,x,_,0,3,HVM), 0                         , 217, 0  , 9390 , 393, 138), // #1399
+  INST(Vpmovsqw         , VexMr_Lx           , E(F30F38,24,_,x,_,0,2,QVM), 0                         , 216, 0  , 9399 , 392, 138), // #1400
+  INST(Vpmovswb         , VexMr_Lx           , E(F30F38,20,_,x,_,0,3,HVM), 0                         , 217, 0  , 9408 , 393, 146), // #1401
+  INST(Vpmovsxbd        , VexRm_Lx           , V(660F38,21,_,x,I,I,2,QVM), 0                         , 219, 0  , 9417 , 395, 142), // #1402
+  INST(Vpmovsxbq        , VexRm_Lx           , V(660F38,22,_,x,I,I,1,OVM), 0                         , 220, 0  , 9427 , 396, 142), // #1403
+  INST(Vpmovsxbw        , VexRm_Lx           , V(660F38,20,_,x,I,I,3,HVM), 0                         , 139, 0  , 9437 , 397, 163), // #1404
+  INST(Vpmovsxdq        , VexRm_Lx           , V(660F38,25,_,x,I,0,3,HVM), 0                         , 139, 0  , 9447 , 397, 142), // #1405
+  INST(Vpmovsxwd        , VexRm_Lx           , V(660F38,23,_,x,I,I,3,HVM), 0                         , 139, 0  , 9457 , 397, 142), // #1406
+  INST(Vpmovsxwq        , VexRm_Lx           , V(660F38,24,_,x,I,I,2,QVM), 0                         , 219, 0  , 9467 , 395, 142), // #1407
+  INST(Vpmovusdb        , VexMr_Lx           , E(F30F38,11,_,x,_,0,2,QVM), 0                         , 216, 0  , 9477 , 392, 138), // #1408
+  INST(Vpmovusdw        , VexMr_Lx           , E(F30F38,13,_,x,_,0,3,HVM), 0                         , 217, 0  , 9487 , 393, 138), // #1409
+  INST(Vpmovusqb        , VexMr_Lx           , E(F30F38,12,_,x,_,0,1,OVM), 0                         , 218, 0  , 9497 , 394, 138), // #1410
+  INST(Vpmovusqd        , VexMr_Lx           , E(F30F38,15,_,x,_,0,3,HVM), 0                         , 217, 0  , 9507 , 393, 138), // #1411
+  INST(Vpmovusqw        , VexMr_Lx           , E(F30F38,14,_,x,_,0,2,QVM), 0                         , 216, 0  , 9517 , 392, 138), // #1412
+  INST(Vpmovuswb        , VexMr_Lx           , E(F30F38,10,_,x,_,0,3,HVM), 0                         , 217, 0  , 9527 , 393, 146), // #1413
+  INST(Vpmovw2m         , VexRm_Lx           , E(F30F38,29,_,x,_,1,_,_  ), 0                         , 205, 0  , 9537 , 391, 146), // #1414
+  INST(Vpmovwb          , VexMr_Lx           , E(F30F38,30,_,x,_,0,3,HVM), 0                         , 217, 0  , 9546 , 393, 146), // #1415
+  INST(Vpmovzxbd        , VexRm_Lx           , V(660F38,31,_,x,I,I,2,QVM), 0                         , 219, 0  , 9554 , 395, 142), // #1416
+  INST(Vpmovzxbq        , VexRm_Lx           , V(660F38,32,_,x,I,I,1,OVM), 0                         , 220, 0  , 9564 , 396, 142), // #1417
+  INST(Vpmovzxbw        , VexRm_Lx           , V(660F38,30,_,x,I,I,3,HVM), 0                         , 139, 0  , 9574 , 397, 163), // #1418
+  INST(Vpmovzxdq        , VexRm_Lx           , V(660F38,35,_,x,I,0,3,HVM), 0                         , 139, 0  , 9584 , 397, 142), // #1419
+  INST(Vpmovzxwd        , VexRm_Lx           , V(660F38,33,_,x,I,I,3,HVM), 0                         , 139, 0  , 9594 , 397, 142), // #1420
+  INST(Vpmovzxwq        , VexRm_Lx           , V(660F38,34,_,x,I,I,2,QVM), 0                         , 219, 0  , 9604 , 395, 142), // #1421
+  INST(Vpmuldq          , VexRvm_Lx          , V(660F38,28,_,x,I,1,4,FV ), 0                         , 209, 0  , 9614 , 208, 142), // #1422
+  INST(Vpmulhrsw        , VexRvm_Lx          , V(660F38,0B,_,x,I,I,4,FVM), 0                         , 110, 0  , 9622 , 315, 163), // #1423
+  INST(Vpmulhuw         , VexRvm_Lx          , V(660F00,E4,_,x,I,I,4,FVM), 0                         , 144, 0  , 9632 , 315, 163), // #1424
+  INST(Vpmulhw          , VexRvm_Lx          , V(660F00,E5,_,x,I,I,4,FVM), 0                         , 144, 0  , 9641 , 315, 163), // #1425
+  INST(Vpmulld          , VexRvm_Lx          , V(660F38,40,_,x,I,0,4,FV ), 0                         , 110, 0  , 9649 , 209, 142), // #1426
+  INST(Vpmullq          , VexRvm_Lx          , E(660F38,40,_,x,_,1,4,FV ), 0                         , 113, 0  , 9657 , 212, 140), // #1427
+  INST(Vpmullw          , VexRvm_Lx          , V(660F00,D5,_,x,I,I,4,FVM), 0                         , 144, 0  , 9665 , 315, 163), // #1428
+  INST(Vpmultishiftqb   , VexRvm_Lx          , E(660F38,83,_,x,_,1,4,FV ), 0                         , 113, 0  , 9673 , 212, 170), // #1429
+  INST(Vpmuludq         , VexRvm_Lx          , V(660F00,F4,_,x,I,1,4,FV ), 0                         , 103, 0  , 9688 , 208, 142), // #1430
+  INST(Vpopcntb         , VexRm_Lx           , E(660F38,54,_,x,_,0,4,FV ), 0                         , 114, 0  , 9697 , 279, 174), // #1431
+  INST(Vpopcntd         , VexRm_Lx           , E(660F38,55,_,x,_,0,4,FVM), 0                         , 114, 0  , 9706 , 374, 175), // #1432
+  INST(Vpopcntq         , VexRm_Lx           , E(660F38,55,_,x,_,1,4,FVM), 0                         , 113, 0  , 9715 , 350, 175), // #1433
+  INST(Vpopcntw         , VexRm_Lx           , E(660F38,54,_,x,_,1,4,FV ), 0                         , 113, 0  , 9724 , 279, 174), // #1434
+  INST(Vpor             , VexRvm_Lx          , V(660F00,EB,_,x,I,_,_,_  ), 0                         , 69 , 0  , 9733 , 351, 160), // #1435
+  INST(Vpord            , VexRvm_Lx          , E(660F00,EB,_,x,_,0,4,FV ), 0                         , 198, 0  , 9738 , 352, 138), // #1436
+  INST(Vporq            , VexRvm_Lx          , E(660F00,EB,_,x,_,1,4,FV ), 0                         , 135, 0  , 9744 , 356, 138), // #1437
+  INST(Vpperm           , VexRvrmRvmr        , V(XOP_M8,A3,_,0,x,_,_,_  ), 0                         , 208, 0  , 9750 , 398, 151), // #1438
+  INST(Vprold           , VexVmi_Lx          , E(660F00,72,1,x,_,0,4,FV ), 0                         , 221, 0  , 9757 , 399, 138), // #1439
+  INST(Vprolq           , VexVmi_Lx          , E(660F00,72,1,x,_,1,4,FV ), 0                         , 222, 0  , 9764 , 400, 138), // #1440
+  INST(Vprolvd          , VexRvm_Lx          , E(660F38,15,_,x,_,0,4,FV ), 0                         , 114, 0  , 9771 , 213, 138), // #1441
+  INST(Vprolvq          , VexRvm_Lx          , E(660F38,15,_,x,_,1,4,FV ), 0                         , 113, 0  , 9779 , 212, 138), // #1442
+  INST(Vprord           , VexVmi_Lx          , E(660F00,72,0,x,_,0,4,FV ), 0                         , 198, 0  , 9787 , 399, 138), // #1443
+  INST(Vprorq           , VexVmi_Lx          , E(660F00,72,0,x,_,1,4,FV ), 0                         , 135, 0  , 9794 , 400, 138), // #1444
+  INST(Vprorvd          , VexRvm_Lx          , E(660F38,14,_,x,_,0,4,FV ), 0                         , 114, 0  , 9801 , 213, 138), // #1445
+  INST(Vprorvq          , VexRvm_Lx          , E(660F38,14,_,x,_,1,4,FV ), 0                         , 113, 0  , 9809 , 212, 138), // #1446
+  INST(Vprotb           , VexRvmRmvRmi       , V(XOP_M9,90,_,0,x,_,_,_  ), V(XOP_M8,C0,_,0,x,_,_,_  ), 79 , 122, 9817 , 401, 151), // #1447
+  INST(Vprotd           , VexRvmRmvRmi       , V(XOP_M9,92,_,0,x,_,_,_  ), V(XOP_M8,C2,_,0,x,_,_,_  ), 79 , 123, 9824 , 401, 151), // #1448
+  INST(Vprotq           , VexRvmRmvRmi       , V(XOP_M9,93,_,0,x,_,_,_  ), V(XOP_M8,C3,_,0,x,_,_,_  ), 79 , 124, 9831 , 401, 151), // #1449
+  INST(Vprotw           , VexRvmRmvRmi       , V(XOP_M9,91,_,0,x,_,_,_  ), V(XOP_M8,C1,_,0,x,_,_,_  ), 79 , 125, 9838 , 401, 151), // #1450
+  INST(Vpsadbw          , VexRvm_Lx          , V(660F00,F6,_,x,I,I,4,FVM), 0                         , 144, 0  , 9845 , 203, 163), // #1451
+  INST(Vpscatterdd      , VexMr_VM           , E(660F38,A0,_,x,_,0,2,T1S), 0                         , 129, 0  , 9853 , 402, 138), // #1452
+  INST(Vpscatterdq      , VexMr_VM           , E(660F38,A0,_,x,_,1,3,T1S), 0                         , 128, 0  , 9865 , 403, 138), // #1453
+  INST(Vpscatterqd      , VexMr_VM           , E(660F38,A1,_,x,_,0,2,T1S), 0                         , 129, 0  , 9877 , 404, 138), // #1454
+  INST(Vpscatterqq      , VexMr_VM           , E(660F38,A1,_,x,_,1,3,T1S), 0                         , 128, 0  , 9889 , 405, 138), // #1455
+  INST(Vpshab           , VexRvmRmv          , V(XOP_M9,98,_,0,x,_,_,_  ), 0                         , 79 , 0  , 9901 , 406, 151), // #1456
+  INST(Vpshad           , VexRvmRmv          , V(XOP_M9,9A,_,0,x,_,_,_  ), 0                         , 79 , 0  , 9908 , 406, 151), // #1457
+  INST(Vpshaq           , VexRvmRmv          , V(XOP_M9,9B,_,0,x,_,_,_  ), 0                         , 79 , 0  , 9915 , 406, 151), // #1458
+  INST(Vpshaw           , VexRvmRmv          , V(XOP_M9,99,_,0,x,_,_,_  ), 0                         , 79 , 0  , 9922 , 406, 151), // #1459
+  INST(Vpshlb           , VexRvmRmv          , V(XOP_M9,94,_,0,x,_,_,_  ), 0                         , 79 , 0  , 9929 , 406, 151), // #1460
+  INST(Vpshld           , VexRvmRmv          , V(XOP_M9,96,_,0,x,_,_,_  ), 0                         , 79 , 0  , 9936 , 406, 151), // #1461
+  INST(Vpshldd          , VexRvmi_Lx         , E(660F3A,71,_,x,_,0,4,FV ), 0                         , 111, 0  , 9943 , 206, 168), // #1462
+  INST(Vpshldq          , VexRvmi_Lx         , E(660F3A,71,_,x,_,1,4,FV ), 0                         , 112, 0  , 9951 , 207, 168), // #1463
+  INST(Vpshldvd         , VexRvm_Lx          , E(660F38,71,_,x,_,0,4,FV ), 0                         , 114, 0  , 9959 , 213, 168), // #1464
+  INST(Vpshldvq         , VexRvm_Lx          , E(660F38,71,_,x,_,1,4,FV ), 0                         , 113, 0  , 9968 , 212, 168), // #1465
+  INST(Vpshldvw         , VexRvm_Lx          , E(660F38,70,_,x,_,1,4,FVM), 0                         , 113, 0  , 9977 , 357, 168), // #1466
+  INST(Vpshldw          , VexRvmi_Lx         , E(660F3A,70,_,x,_,1,4,FVM), 0                         , 112, 0  , 9986 , 275, 168), // #1467
+  INST(Vpshlq           , VexRvmRmv          , V(XOP_M9,97,_,0,x,_,_,_  ), 0                         , 79 , 0  , 9994 , 406, 151), // #1468
+  INST(Vpshlw           , VexRvmRmv          , V(XOP_M9,95,_,0,x,_,_,_  ), 0                         , 79 , 0  , 10001, 406, 151), // #1469
+  INST(Vpshrdd          , VexRvmi_Lx         , E(660F3A,73,_,x,_,0,4,FV ), 0                         , 111, 0  , 10008, 206, 168), // #1470
+  INST(Vpshrdq          , VexRvmi_Lx         , E(660F3A,73,_,x,_,1,4,FV ), 0                         , 112, 0  , 10016, 207, 168), // #1471
+  INST(Vpshrdvd         , VexRvm_Lx          , E(660F38,73,_,x,_,0,4,FV ), 0                         , 114, 0  , 10024, 213, 168), // #1472
+  INST(Vpshrdvq         , VexRvm_Lx          , E(660F38,73,_,x,_,1,4,FV ), 0                         , 113, 0  , 10033, 212, 168), // #1473
+  INST(Vpshrdvw         , VexRvm_Lx          , E(660F38,72,_,x,_,1,4,FVM), 0                         , 113, 0  , 10042, 357, 168), // #1474
+  INST(Vpshrdw          , VexRvmi_Lx         , E(660F3A,72,_,x,_,1,4,FVM), 0                         , 112, 0  , 10051, 275, 168), // #1475
+  INST(Vpshufb          , VexRvm_Lx          , V(660F38,00,_,x,I,I,4,FVM), 0                         , 110, 0  , 10059, 315, 163), // #1476
+  INST(Vpshufbitqmb     , VexRvm_Lx          , E(660F38,8F,_,x,0,0,4,FVM), 0                         , 114, 0  , 10067, 407, 174), // #1477
+  INST(Vpshufd          , VexRmi_Lx          , V(660F00,70,_,x,I,0,4,FV ), 0                         , 144, 0  , 10080, 408, 142), // #1478
+  INST(Vpshufhw         , VexRmi_Lx          , V(F30F00,70,_,x,I,I,4,FVM), 0                         , 161, 0  , 10088, 409, 163), // #1479
+  INST(Vpshuflw         , VexRmi_Lx          , V(F20F00,70,_,x,I,I,4,FVM), 0                         , 223, 0  , 10097, 409, 163), // #1480
+  INST(Vpsignb          , VexRvm_Lx          , V(660F38,08,_,x,I,_,_,_  ), 0                         , 96 , 0  , 10106, 202, 160), // #1481
+  INST(Vpsignd          , VexRvm_Lx          , V(660F38,0A,_,x,I,_,_,_  ), 0                         , 96 , 0  , 10114, 202, 160), // #1482
+  INST(Vpsignw          , VexRvm_Lx          , V(660F38,09,_,x,I,_,_,_  ), 0                         , 96 , 0  , 10122, 202, 160), // #1483
+  INST(Vpslld           , VexRvmVmi_Lx_MEvex , V(660F00,F2,_,x,I,0,4,128), V(660F00,72,6,x,I,0,4,FV ), 224, 126, 10130, 410, 142), // #1484
+  INST(Vpslldq          , VexVmi_Lx_MEvex    , V(660F00,73,7,x,I,I,4,FVM), 0                         , 225, 0  , 10137, 411, 163), // #1485
+  INST(Vpsllq           , VexRvmVmi_Lx_MEvex , V(660F00,F3,_,x,I,1,4,128), V(660F00,73,6,x,I,1,4,FV ), 226, 127, 10145, 412, 142), // #1486
+  INST(Vpsllvd          , VexRvm_Lx          , V(660F38,47,_,x,0,0,4,FV ), 0                         , 110, 0  , 10152, 209, 152), // #1487
+  INST(Vpsllvq          , VexRvm_Lx          , V(660F38,47,_,x,1,1,4,FV ), 0                         , 182, 0  , 10160, 208, 152), // #1488
+  INST(Vpsllvw          , VexRvm_Lx          , E(660F38,12,_,x,_,1,4,FVM), 0                         , 113, 0  , 10168, 357, 146), // #1489
+  INST(Vpsllw           , VexRvmVmi_Lx_MEvex , V(660F00,F1,_,x,I,I,4,128), V(660F00,71,6,x,I,I,4,FVM), 224, 128, 10176, 413, 163), // #1490
+  INST(Vpsrad           , VexRvmVmi_Lx_MEvex , V(660F00,E2,_,x,I,0,4,128), V(660F00,72,4,x,I,0,4,FV ), 224, 129, 10183, 410, 142), // #1491
+  INST(Vpsraq           , VexRvmVmi_Lx_MEvex , E(660F00,E2,_,x,_,1,4,128), E(660F00,72,4,x,_,1,4,FV ), 227, 130, 10190, 414, 138), // #1492
+  INST(Vpsravd          , VexRvm_Lx          , V(660F38,46,_,x,0,0,4,FV ), 0                         , 110, 0  , 10197, 209, 152), // #1493
+  INST(Vpsravq          , VexRvm_Lx          , E(660F38,46,_,x,_,1,4,FV ), 0                         , 113, 0  , 10205, 212, 138), // #1494
+  INST(Vpsravw          , VexRvm_Lx          , E(660F38,11,_,x,_,1,4,FVM), 0                         , 113, 0  , 10213, 357, 146), // #1495
+  INST(Vpsraw           , VexRvmVmi_Lx_MEvex , V(660F00,E1,_,x,I,I,4,128), V(660F00,71,4,x,I,I,4,FVM), 224, 131, 10221, 413, 163), // #1496
+  INST(Vpsrld           , VexRvmVmi_Lx_MEvex , V(660F00,D2,_,x,I,0,4,128), V(660F00,72,2,x,I,0,4,FV ), 224, 132, 10228, 410, 142), // #1497
+  INST(Vpsrldq          , VexVmi_Lx_MEvex    , V(660F00,73,3,x,I,I,4,FVM), 0                         , 228, 0  , 10235, 411, 163), // #1498
+  INST(Vpsrlq           , VexRvmVmi_Lx_MEvex , V(660F00,D3,_,x,I,1,4,128), V(660F00,73,2,x,I,1,4,FV ), 226, 133, 10243, 412, 142), // #1499
+  INST(Vpsrlvd          , VexRvm_Lx          , V(660F38,45,_,x,0,0,4,FV ), 0                         , 110, 0  , 10250, 209, 152), // #1500
+  INST(Vpsrlvq          , VexRvm_Lx          , V(660F38,45,_,x,1,1,4,FV ), 0                         , 182, 0  , 10258, 208, 152), // #1501
+  INST(Vpsrlvw          , VexRvm_Lx          , E(660F38,10,_,x,_,1,4,FVM), 0                         , 113, 0  , 10266, 357, 146), // #1502
+  INST(Vpsrlw           , VexRvmVmi_Lx_MEvex , V(660F00,D1,_,x,I,I,4,128), V(660F00,71,2,x,I,I,4,FVM), 224, 134, 10274, 413, 163), // #1503
+  INST(Vpsubb           , VexRvm_Lx          , V(660F00,F8,_,x,I,I,4,FVM), 0                         , 144, 0  , 10281, 415, 163), // #1504
+  INST(Vpsubd           , VexRvm_Lx          , V(660F00,FA,_,x,I,0,4,FV ), 0                         , 144, 0  , 10288, 416, 142), // #1505
+  INST(Vpsubq           , VexRvm_Lx          , V(660F00,FB,_,x,I,1,4,FV ), 0                         , 103, 0  , 10295, 417, 142), // #1506
+  INST(Vpsubsb          , VexRvm_Lx          , V(660F00,E8,_,x,I,I,4,FVM), 0                         , 144, 0  , 10302, 415, 163), // #1507
+  INST(Vpsubsw          , VexRvm_Lx          , V(660F00,E9,_,x,I,I,4,FVM), 0                         , 144, 0  , 10310, 415, 163), // #1508
+  INST(Vpsubusb         , VexRvm_Lx          , V(660F00,D8,_,x,I,I,4,FVM), 0                         , 144, 0  , 10318, 415, 163), // #1509
+  INST(Vpsubusw         , VexRvm_Lx          , V(660F00,D9,_,x,I,I,4,FVM), 0                         , 144, 0  , 10327, 415, 163), // #1510
+  INST(Vpsubw           , VexRvm_Lx          , V(660F00,F9,_,x,I,I,4,FVM), 0                         , 144, 0  , 10336, 415, 163), // #1511
+  INST(Vpternlogd       , VexRvmi_Lx         , E(660F3A,25,_,x,_,0,4,FV ), 0                         , 111, 0  , 10343, 206, 138), // #1512
+  INST(Vpternlogq       , VexRvmi_Lx         , E(660F3A,25,_,x,_,1,4,FV ), 0                         , 112, 0  , 10354, 207, 138), // #1513
+  INST(Vptest           , VexRm_Lx           , V(660F38,17,_,x,I,_,_,_  ), 0                         , 96 , 0  , 10365, 298, 167), // #1514
+  INST(Vptestmb         , VexRvm_Lx          , E(660F38,26,_,x,_,0,4,FVM), 0                         , 114, 0  , 10372, 407, 146), // #1515
+  INST(Vptestmd         , VexRvm_Lx          , E(660F38,27,_,x,_,0,4,FV ), 0                         , 114, 0  , 10381, 418, 138), // #1516
+  INST(Vptestmq         , VexRvm_Lx          , E(660F38,27,_,x,_,1,4,FV ), 0                         , 113, 0  , 10390, 419, 138), // #1517
+  INST(Vptestmw         , VexRvm_Lx          , E(660F38,26,_,x,_,1,4,FVM), 0                         , 113, 0  , 10399, 407, 146), // #1518
+  INST(Vptestnmb        , VexRvm_Lx          , E(F30F38,26,_,x,_,0,4,FVM), 0                         , 132, 0  , 10408, 407, 146), // #1519
+  INST(Vptestnmd        , VexRvm_Lx          , E(F30F38,27,_,x,_,0,4,FV ), 0                         , 132, 0  , 10418, 418, 138), // #1520
+  INST(Vptestnmq        , VexRvm_Lx          , E(F30F38,27,_,x,_,1,4,FV ), 0                         , 229, 0  , 10428, 419, 138), // #1521
+  INST(Vptestnmw        , VexRvm_Lx          , E(F30F38,26,_,x,_,1,4,FVM), 0                         , 229, 0  , 10438, 407, 146), // #1522
+  INST(Vpunpckhbw       , VexRvm_Lx          , V(660F00,68,_,x,I,I,4,FVM), 0                         , 144, 0  , 10448, 315, 163), // #1523
+  INST(Vpunpckhdq       , VexRvm_Lx          , V(660F00,6A,_,x,I,0,4,FV ), 0                         , 144, 0  , 10459, 209, 142), // #1524
+  INST(Vpunpckhqdq      , VexRvm_Lx          , V(660F00,6D,_,x,I,1,4,FV ), 0                         , 103, 0  , 10470, 208, 142), // #1525
+  INST(Vpunpckhwd       , VexRvm_Lx          , V(660F00,69,_,x,I,I,4,FVM), 0                         , 144, 0  , 10482, 315, 163), // #1526
+  INST(Vpunpcklbw       , VexRvm_Lx          , V(660F00,60,_,x,I,I,4,FVM), 0                         , 144, 0  , 10493, 315, 163), // #1527
+  INST(Vpunpckldq       , VexRvm_Lx          , V(660F00,62,_,x,I,0,4,FV ), 0                         , 144, 0  , 10504, 209, 142), // #1528
+  INST(Vpunpcklqdq      , VexRvm_Lx          , V(660F00,6C,_,x,I,1,4,FV ), 0                         , 103, 0  , 10515, 208, 142), // #1529
+  INST(Vpunpcklwd       , VexRvm_Lx          , V(660F00,61,_,x,I,I,4,FVM), 0                         , 144, 0  , 10527, 315, 163), // #1530
+  INST(Vpxor            , VexRvm_Lx          , V(660F00,EF,_,x,I,_,_,_  ), 0                         , 69 , 0  , 10538, 353, 160), // #1531
+  INST(Vpxord           , VexRvm_Lx          , E(660F00,EF,_,x,_,0,4,FV ), 0                         , 198, 0  , 10544, 354, 138), // #1532
+  INST(Vpxorq           , VexRvm_Lx          , E(660F00,EF,_,x,_,1,4,FV ), 0                         , 135, 0  , 10551, 355, 138), // #1533
+  INST(Vrangepd         , VexRvmi_Lx         , E(660F3A,50,_,x,_,1,4,FV ), 0                         , 112, 0  , 10558, 285, 140), // #1534
+  INST(Vrangeps         , VexRvmi_Lx         , E(660F3A,50,_,x,_,0,4,FV ), 0                         , 111, 0  , 10567, 286, 140), // #1535
+  INST(Vrangesd         , VexRvmi            , E(660F3A,51,_,I,_,1,3,T1S), 0                         , 180, 0  , 10576, 287, 66 ), // #1536
+  INST(Vrangess         , VexRvmi            , E(660F3A,51,_,I,_,0,2,T1S), 0                         , 181, 0  , 10585, 288, 66 ), // #1537
+  INST(Vrcp14pd         , VexRm_Lx           , E(660F38,4C,_,x,_,1,4,FV ), 0                         , 113, 0  , 10594, 350, 138), // #1538
+  INST(Vrcp14ps         , VexRm_Lx           , E(660F38,4C,_,x,_,0,4,FV ), 0                         , 114, 0  , 10603, 374, 138), // #1539
+  INST(Vrcp14sd         , VexRvm             , E(660F38,4D,_,I,_,1,3,T1S), 0                         , 128, 0  , 10612, 420, 68 ), // #1540
+  INST(Vrcp14ss         , VexRvm             , E(660F38,4D,_,I,_,0,2,T1S), 0                         , 129, 0  , 10621, 421, 68 ), // #1541
+  INST(Vrcp28pd         , VexRm              , E(660F38,CA,_,2,_,1,4,FV ), 0                         , 170, 0  , 10630, 277, 147), // #1542
+  INST(Vrcp28ps         , VexRm              , E(660F38,CA,_,2,_,0,4,FV ), 0                         , 171, 0  , 10639, 278, 147), // #1543
+  INST(Vrcp28sd         , VexRvm             , E(660F38,CB,_,I,_,1,3,T1S), 0                         , 128, 0  , 10648, 308, 147), // #1544
+  INST(Vrcp28ss         , VexRvm             , E(660F38,CB,_,I,_,0,2,T1S), 0                         , 129, 0  , 10657, 309, 147), // #1545
+  INST(Vrcpph           , VexRm_Lx           , E(66MAP6,4C,_,_,_,0,4,FV ), 0                         , 183, 0  , 10666, 422, 134), // #1546
+  INST(Vrcpps           , VexRm_Lx           , V(000F00,53,_,x,I,_,_,_  ), 0                         , 72 , 0  , 10673, 298, 135), // #1547
+  INST(Vrcpsh           , VexRvm             , E(66MAP6,4D,_,_,_,0,1,T1S), 0                         , 185, 0  , 10680, 423, 134), // #1548
+  INST(Vrcpss           , VexRvm             , V(F30F00,53,_,I,I,_,_,_  ), 0                         , 199, 0  , 10687, 424, 135), // #1549
+  INST(Vreducepd        , VexRmi_Lx          , E(660F3A,56,_,x,_,1,4,FV ), 0                         , 112, 0  , 10694, 400, 140), // #1550
+  INST(Vreduceph        , VexRmi_Lx          , E(000F3A,56,_,_,_,0,4,FV ), 0                         , 123, 0  , 10704, 311, 132), // #1551
+  INST(Vreduceps        , VexRmi_Lx          , E(660F3A,56,_,x,_,0,4,FV ), 0                         , 111, 0  , 10714, 399, 140), // #1552
+  INST(Vreducesd        , VexRvmi            , E(660F3A,57,_,I,_,1,3,T1S), 0                         , 180, 0  , 10724, 425, 66 ), // #1553
+  INST(Vreducesh        , VexRvmi            , E(000F3A,57,_,_,_,0,1,T1S), 0                         , 188, 0  , 10734, 313, 134), // #1554
+  INST(Vreducess        , VexRvmi            , E(660F3A,57,_,I,_,0,2,T1S), 0                         , 181, 0  , 10744, 426, 66 ), // #1555
+  INST(Vrndscalepd      , VexRmi_Lx          , E(660F3A,09,_,x,_,1,4,FV ), 0                         , 112, 0  , 10754, 310, 138), // #1556
+  INST(Vrndscaleph      , VexRmi_Lx          , E(000F3A,08,_,_,_,0,4,FV ), 0                         , 123, 0  , 10766, 311, 132), // #1557
+  INST(Vrndscaleps      , VexRmi_Lx          , E(660F3A,08,_,x,_,0,4,FV ), 0                         , 111, 0  , 10778, 312, 138), // #1558
+  INST(Vrndscalesd      , VexRvmi            , E(660F3A,0B,_,I,_,1,3,T1S), 0                         , 180, 0  , 10790, 287, 68 ), // #1559
+  INST(Vrndscalesh      , VexRvmi            , E(000F3A,0A,_,_,_,0,1,T1S), 0                         , 188, 0  , 10802, 313, 134), // #1560
+  INST(Vrndscaless      , VexRvmi            , E(660F3A,0A,_,I,_,0,2,T1S), 0                         , 181, 0  , 10814, 288, 68 ), // #1561
+  INST(Vroundpd         , VexRmi_Lx          , V(660F3A,09,_,x,I,_,_,_  ), 0                         , 73 , 0  , 10826, 427, 135), // #1562
+  INST(Vroundps         , VexRmi_Lx          , V(660F3A,08,_,x,I,_,_,_  ), 0                         , 73 , 0  , 10835, 427, 135), // #1563
+  INST(Vroundsd         , VexRvmi            , V(660F3A,0B,_,I,I,_,_,_  ), 0                         , 73 , 0  , 10844, 428, 135), // #1564
+  INST(Vroundss         , VexRvmi            , V(660F3A,0A,_,I,I,_,_,_  ), 0                         , 73 , 0  , 10853, 429, 135), // #1565
+  INST(Vrsqrt14pd       , VexRm_Lx           , E(660F38,4E,_,x,_,1,4,FV ), 0                         , 113, 0  , 10862, 350, 138), // #1566
+  INST(Vrsqrt14ps       , VexRm_Lx           , E(660F38,4E,_,x,_,0,4,FV ), 0                         , 114, 0  , 10873, 374, 138), // #1567
+  INST(Vrsqrt14sd       , VexRvm             , E(660F38,4F,_,I,_,1,3,T1S), 0                         , 128, 0  , 10884, 420, 68 ), // #1568
+  INST(Vrsqrt14ss       , VexRvm             , E(660F38,4F,_,I,_,0,2,T1S), 0                         , 129, 0  , 10895, 421, 68 ), // #1569
+  INST(Vrsqrt28pd       , VexRm              , E(660F38,CC,_,2,_,1,4,FV ), 0                         , 170, 0  , 10906, 277, 147), // #1570
+  INST(Vrsqrt28ps       , VexRm              , E(660F38,CC,_,2,_,0,4,FV ), 0                         , 171, 0  , 10917, 278, 147), // #1571
+  INST(Vrsqrt28sd       , VexRvm             , E(660F38,CD,_,I,_,1,3,T1S), 0                         , 128, 0  , 10928, 308, 147), // #1572
+  INST(Vrsqrt28ss       , VexRvm             , E(660F38,CD,_,I,_,0,2,T1S), 0                         , 129, 0  , 10939, 309, 147), // #1573
+  INST(Vrsqrtph         , VexRm_Lx           , E(66MAP6,4E,_,_,_,0,4,FV ), 0                         , 183, 0  , 10950, 422, 132), // #1574
+  INST(Vrsqrtps         , VexRm_Lx           , V(000F00,52,_,x,I,_,_,_  ), 0                         , 72 , 0  , 10959, 298, 135), // #1575
+  INST(Vrsqrtsh         , VexRvm             , E(66MAP6,4F,_,_,_,0,1,T1S), 0                         , 185, 0  , 10968, 423, 134), // #1576
+  INST(Vrsqrtss         , VexRvm             , V(F30F00,52,_,I,I,_,_,_  ), 0                         , 199, 0  , 10977, 424, 135), // #1577
+  INST(Vscalefpd        , VexRvm_Lx          , E(660F38,2C,_,x,_,1,4,FV ), 0                         , 113, 0  , 10986, 430, 138), // #1578
+  INST(Vscalefph        , VexRvm_Lx          , E(66MAP6,2C,_,_,_,0,4,FV ), 0                         , 183, 0  , 10996, 197, 132), // #1579
+  INST(Vscalefps        , VexRvm_Lx          , E(660F38,2C,_,x,_,0,4,FV ), 0                         , 114, 0  , 11006, 284, 138), // #1580
+  INST(Vscalefsd        , VexRvm             , E(660F38,2D,_,I,_,1,3,T1S), 0                         , 128, 0  , 11016, 251, 68 ), // #1581
+  INST(Vscalefsh        , VexRvm             , E(66MAP6,2D,_,_,_,0,1,T1S), 0                         , 185, 0  , 11026, 200, 134), // #1582
+  INST(Vscalefss        , VexRvm             , E(660F38,2D,_,I,_,0,2,T1S), 0                         , 129, 0  , 11036, 259, 68 ), // #1583
+  INST(Vscatterdpd      , VexMr_VM           , E(660F38,A2,_,x,_,1,3,T1S), 0                         , 128, 0  , 11046, 403, 138), // #1584
+  INST(Vscatterdps      , VexMr_VM           , E(660F38,A2,_,x,_,0,2,T1S), 0                         , 129, 0  , 11058, 402, 138), // #1585
+  INST(Vscatterpf0dpd   , VexM_VM            , E(660F38,C6,5,2,_,1,3,T1S), 0                         , 230, 0  , 11070, 303, 153), // #1586
+  INST(Vscatterpf0dps   , VexM_VM            , E(660F38,C6,5,2,_,0,2,T1S), 0                         , 231, 0  , 11085, 304, 153), // #1587
+  INST(Vscatterpf0qpd   , VexM_VM            , E(660F38,C7,5,2,_,1,3,T1S), 0                         , 230, 0  , 11100, 305, 153), // #1588
+  INST(Vscatterpf0qps   , VexM_VM            , E(660F38,C7,5,2,_,0,2,T1S), 0                         , 231, 0  , 11115, 305, 153), // #1589
+  INST(Vscatterpf1dpd   , VexM_VM            , E(660F38,C6,6,2,_,1,3,T1S), 0                         , 232, 0  , 11130, 303, 153), // #1590
+  INST(Vscatterpf1dps   , VexM_VM            , E(660F38,C6,6,2,_,0,2,T1S), 0                         , 233, 0  , 11145, 304, 153), // #1591
+  INST(Vscatterpf1qpd   , VexM_VM            , E(660F38,C7,6,2,_,1,3,T1S), 0                         , 232, 0  , 11160, 305, 153), // #1592
+  INST(Vscatterpf1qps   , VexM_VM            , E(660F38,C7,6,2,_,0,2,T1S), 0                         , 233, 0  , 11175, 305, 153), // #1593
+  INST(Vscatterqpd      , VexMr_VM           , E(660F38,A3,_,x,_,1,3,T1S), 0                         , 128, 0  , 11190, 405, 138), // #1594
+  INST(Vscatterqps      , VexMr_VM           , E(660F38,A3,_,x,_,0,2,T1S), 0                         , 129, 0  , 11202, 404, 138), // #1595
+  INST(Vshuff32x4       , VexRvmi_Lx         , E(660F3A,23,_,x,_,0,4,FV ), 0                         , 111, 0  , 11214, 431, 138), // #1596
+  INST(Vshuff64x2       , VexRvmi_Lx         , E(660F3A,23,_,x,_,1,4,FV ), 0                         , 112, 0  , 11225, 432, 138), // #1597
+  INST(Vshufi32x4       , VexRvmi_Lx         , E(660F3A,43,_,x,_,0,4,FV ), 0                         , 111, 0  , 11236, 431, 138), // #1598
+  INST(Vshufi64x2       , VexRvmi_Lx         , E(660F3A,43,_,x,_,1,4,FV ), 0                         , 112, 0  , 11247, 432, 138), // #1599
+  INST(Vshufpd          , VexRvmi_Lx         , V(660F00,C6,_,x,I,1,4,FV ), 0                         , 103, 0  , 11258, 433, 131), // #1600
+  INST(Vshufps          , VexRvmi_Lx         , V(000F00,C6,_,x,I,0,4,FV ), 0                         , 105, 0  , 11266, 434, 131), // #1601
+  INST(Vsqrtpd          , VexRm_Lx           , V(660F00,51,_,x,I,1,4,FV ), 0                         , 103, 0  , 11274, 435, 131), // #1602
+  INST(Vsqrtph          , VexRm_Lx           , E(00MAP5,51,_,_,_,0,4,FV ), 0                         , 104, 0  , 11282, 246, 132), // #1603
+  INST(Vsqrtps          , VexRm_Lx           , V(000F00,51,_,x,I,0,4,FV ), 0                         , 105, 0  , 11290, 235, 131), // #1604
+  INST(Vsqrtsd          , VexRvm             , V(F20F00,51,_,I,I,1,3,T1S), 0                         , 106, 0  , 11298, 199, 133), // #1605
+  INST(Vsqrtsh          , VexRvm             , E(F3MAP5,51,_,_,_,0,1,T1S), 0                         , 107, 0  , 11306, 200, 134), // #1606
+  INST(Vsqrtss          , VexRvm             , V(F30F00,51,_,I,I,0,2,T1S), 0                         , 108, 0  , 11314, 201, 133), // #1607
+  INST(Vstmxcsr         , VexM               , V(000F00,AE,3,0,I,_,_,_  ), 0                         , 234, 0  , 11322, 321, 135), // #1608
+  INST(Vsubpd           , VexRvm_Lx          , V(660F00,5C,_,x,I,1,4,FV ), 0                         , 103, 0  , 11331, 196, 131), // #1609
+  INST(Vsubph           , VexRvm_Lx          , E(00MAP5,5C,_,_,_,0,4,FV ), 0                         , 104, 0  , 11338, 197, 132), // #1610
+  INST(Vsubps           , VexRvm_Lx          , V(000F00,5C,_,x,I,0,4,FV ), 0                         , 105, 0  , 11345, 198, 131), // #1611
+  INST(Vsubsd           , VexRvm             , V(F20F00,5C,_,I,I,1,3,T1S), 0                         , 106, 0  , 11352, 199, 133), // #1612
+  INST(Vsubsh           , VexRvm             , E(F3MAP5,5C,_,_,_,0,1,T1S), 0                         , 107, 0  , 11359, 200, 134), // #1613
+  INST(Vsubss           , VexRvm             , V(F30F00,5C,_,I,I,0,2,T1S), 0                         , 108, 0  , 11366, 201, 133), // #1614
+  INST(Vtestpd          , VexRm_Lx           , V(660F38,0F,_,x,0,_,_,_  ), 0                         , 96 , 0  , 11373, 298, 167), // #1615
+  INST(Vtestps          , VexRm_Lx           , V(660F38,0E,_,x,0,_,_,_  ), 0                         , 96 , 0  , 11381, 298, 167), // #1616
+  INST(Vucomisd         , VexRm              , V(660F00,2E,_,I,I,1,3,T1S), 0                         , 125, 0  , 11389, 229, 143), // #1617
+  INST(Vucomish         , VexRm              , E(00MAP5,2E,_,_,_,0,1,T1S), 0                         , 126, 0  , 11398, 230, 134), // #1618
+  INST(Vucomiss         , VexRm              , V(000F00,2E,_,I,I,0,2,T1S), 0                         , 127, 0  , 11407, 231, 143), // #1619
+  INST(Vunpckhpd        , VexRvm_Lx          , V(660F00,15,_,x,I,1,4,FV ), 0                         , 103, 0  , 11416, 208, 131), // #1620
+  INST(Vunpckhps        , VexRvm_Lx          , V(000F00,15,_,x,I,0,4,FV ), 0                         , 105, 0  , 11426, 209, 131), // #1621
+  INST(Vunpcklpd        , VexRvm_Lx          , V(660F00,14,_,x,I,1,4,FV ), 0                         , 103, 0  , 11436, 208, 131), // #1622
+  INST(Vunpcklps        , VexRvm_Lx          , V(000F00,14,_,x,I,0,4,FV ), 0                         , 105, 0  , 11446, 209, 131), // #1623
+  INST(Vxorpd           , VexRvm_Lx          , V(660F00,57,_,x,I,1,4,FV ), 0                         , 103, 0  , 11456, 417, 139), // #1624
+  INST(Vxorps           , VexRvm_Lx          , V(000F00,57,_,x,I,0,4,FV ), 0                         , 105, 0  , 11463, 416, 139), // #1625
+  INST(Vzeroall         , VexOp              , V(000F00,77,_,1,I,_,_,_  ), 0                         , 68 , 0  , 11470, 436, 135), // #1626
+  INST(Vzeroupper       , VexOp              , V(000F00,77,_,0,I,_,_,_  ), 0                         , 72 , 0  , 11479, 436, 135), // #1627
+  INST(Wbinvd           , X86Op              , O(000F00,09,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11490, 30 , 0  ), // #1628
+  INST(Wbnoinvd         , X86Op              , O(F30F00,09,_,_,_,_,_,_  ), 0                         , 6  , 0  , 11497, 30 , 176), // #1629
+  INST(Wrfsbase         , X86M               , O(F30F00,AE,2,_,x,_,_,_  ), 0                         , 235, 0  , 11506, 173, 111), // #1630
+  INST(Wrgsbase         , X86M               , O(F30F00,AE,3,_,x,_,_,_  ), 0                         , 236, 0  , 11515, 173, 111), // #1631
+  INST(Wrmsr            , X86Op              , O(000F00,30,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11524, 174, 112), // #1632
+  INST(Wrssd            , X86Mr              , O(000F38,F6,_,_,_,_,_,_  ), 0                         , 83 , 0  , 11530, 437, 56 ), // #1633
+  INST(Wrssq            , X86Mr              , O(000F38,F6,_,_,1,_,_,_  ), 0                         , 237, 0  , 11536, 438, 56 ), // #1634
+  INST(Wrussd           , X86Mr              , O(660F38,F5,_,_,_,_,_,_  ), 0                         , 2  , 0  , 11542, 437, 56 ), // #1635
+  INST(Wrussq           , X86Mr              , O(660F38,F5,_,_,1,_,_,_  ), 0                         , 238, 0  , 11549, 438, 56 ), // #1636
+  INST(Xabort           , X86Op_Mod11RM_I8   , O(000000,C6,7,_,_,_,_,_  ), 0                         , 27 , 0  , 11556, 80 , 177), // #1637
+  INST(Xadd             , X86Xadd            , O(000F00,C0,_,_,x,_,_,_  ), 0                         , 4  , 0  , 11563, 439, 38 ), // #1638
+  INST(Xbegin           , X86JmpRel          , O(000000,C7,7,_,_,_,_,_  ), 0                         , 27 , 0  , 11568, 440, 177), // #1639
+  INST(Xchg             , X86Xchg            , O(000000,86,_,_,x,_,_,_  ), 0                         , 0  , 0  , 462  , 441, 0  ), // #1640
+  INST(Xend             , X86Op              , O(000F01,D5,_,_,_,_,_,_  ), 0                         , 21 , 0  , 11575, 30 , 177), // #1641
+  INST(Xgetbv           , X86Op              , O(000F01,D0,_,_,_,_,_,_  ), 0                         , 21 , 0  , 11580, 174, 178), // #1642
+  INST(Xlatb            , X86Op              , O(000000,D7,_,_,_,_,_,_  ), 0                         , 0  , 0  , 11587, 30 , 0  ), // #1643
+  INST(Xor              , X86Arith           , O(000000,30,6,_,x,_,_,_  ), 0                         , 32 , 0  , 10540, 179, 1  ), // #1644
+  INST(Xorpd            , ExtRm              , O(660F00,57,_,_,_,_,_,_  ), 0                         , 3  , 0  , 11457, 151, 4  ), // #1645
+  INST(Xorps            , ExtRm              , O(000F00,57,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11464, 151, 5  ), // #1646
+  INST(Xresldtrk        , X86Op              , O(F20F01,E9,_,_,_,_,_,_  ), 0                         , 92 , 0  , 11593, 30 , 179), // #1647
+  INST(Xrstor           , X86M_Only_EDX_EAX  , O(000F00,AE,5,_,_,_,_,_  ), 0                         , 77 , 0  , 1164 , 442, 178), // #1648
+  INST(Xrstor64         , X86M_Only_EDX_EAX  , O(000F00,AE,5,_,1,_,_,_  ), 0                         , 239, 0  , 1172 , 443, 178), // #1649
+  INST(Xrstors          , X86M_Only_EDX_EAX  , O(000F00,C7,3,_,_,_,_,_  ), 0                         , 78 , 0  , 11603, 442, 180), // #1650
+  INST(Xrstors64        , X86M_Only_EDX_EAX  , O(000F00,C7,3,_,1,_,_,_  ), 0                         , 240, 0  , 11611, 443, 180), // #1651
+  INST(Xsave            , X86M_Only_EDX_EAX  , O(000F00,AE,4,_,_,_,_,_  ), 0                         , 97 , 0  , 1182 , 442, 178), // #1652
+  INST(Xsave64          , X86M_Only_EDX_EAX  , O(000F00,AE,4,_,1,_,_,_  ), 0                         , 241, 0  , 1189 , 443, 178), // #1653
+  INST(Xsavec           , X86M_Only_EDX_EAX  , O(000F00,C7,4,_,_,_,_,_  ), 0                         , 97 , 0  , 11621, 442, 181), // #1654
+  INST(Xsavec64         , X86M_Only_EDX_EAX  , O(000F00,C7,4,_,1,_,_,_  ), 0                         , 241, 0  , 11628, 443, 181), // #1655
+  INST(Xsaveopt         , X86M_Only_EDX_EAX  , O(000F00,AE,6,_,_,_,_,_  ), 0                         , 80 , 0  , 11637, 442, 182), // #1656
+  INST(Xsaveopt64       , X86M_Only_EDX_EAX  , O(000F00,AE,6,_,1,_,_,_  ), 0                         , 242, 0  , 11646, 443, 182), // #1657
+  INST(Xsaves           , X86M_Only_EDX_EAX  , O(000F00,C7,5,_,_,_,_,_  ), 0                         , 77 , 0  , 11657, 442, 180), // #1658
+  INST(Xsaves64         , X86M_Only_EDX_EAX  , O(000F00,C7,5,_,1,_,_,_  ), 0                         , 239, 0  , 11664, 443, 180), // #1659
+  INST(Xsetbv           , X86Op              , O(000F01,D1,_,_,_,_,_,_  ), 0                         , 21 , 0  , 11673, 174, 178), // #1660
+  INST(Xsusldtrk        , X86Op              , O(F20F01,E8,_,_,_,_,_,_  ), 0                         , 92 , 0  , 11680, 30 , 179), // #1661
+  INST(Xtest            , X86Op              , O(000F01,D6,_,_,_,_,_,_  ), 0                         , 21 , 0  , 11690, 30 , 183)  // #1662
+  // ${InstInfo:End}
+};
+#undef NAME_DATA_INDEX
+#undef INST
+
+// x86::InstDB - Opcode Tables
+// ===========================
+
+// ${MainOpcodeTable:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+const uint32_t InstDB::_mainOpcodeTable[] = {
+  O(000000,00,0,0,0,0,0,0   ), // #0 [ref=56x]
+  O(000000,00,2,0,0,0,0,0   ), // #1 [ref=4x]
+  O(660F38,00,0,0,0,0,0,0   ), // #2 [ref=43x]
+  O(660F00,00,0,0,0,0,0,0   ), // #3 [ref=38x]
+  O(000F00,00,0,0,0,0,0,0   ), // #4 [ref=231x]
+  O(F20F00,00,0,0,0,0,0,0   ), // #5 [ref=24x]
+  O(F30F00,00,0,0,0,0,0,0   ), // #6 [ref=29x]
+  O(F30F38,00,0,0,0,0,0,0   ), // #7 [ref=2x]
+  O(660F3A,00,0,0,0,0,0,0   ), // #8 [ref=22x]
+  O(000000,00,4,0,0,0,0,0   ), // #9 [ref=5x]
+  V(000F38,00,0,0,0,0,0,None), // #10 [ref=6x]
+  V(XOP_M9,00,1,0,0,0,0,None), // #11 [ref=3x]
+  V(XOP_M9,00,6,0,0,0,0,None), // #12 [ref=2x]
+  V(XOP_M9,00,5,0,0,0,0,None), // #13 [ref=1x]
+  V(XOP_M9,00,3,0,0,0,0,None), // #14 [ref=1x]
+  V(XOP_M9,00,2,0,0,0,0,None), // #15 [ref=1x]
+  V(000F38,00,3,0,0,0,0,None), // #16 [ref=1x]
+  V(000F38,00,2,0,0,0,0,None), // #17 [ref=1x]
+  V(000F38,00,1,0,0,0,0,None), // #18 [ref=1x]
+  O(660000,00,0,0,0,0,0,0   ), // #19 [ref=7x]
+  O(000000,00,0,0,1,0,0,0   ), // #20 [ref=3x]
+  O(000F01,00,0,0,0,0,0,0   ), // #21 [ref=29x]
+  O(000F00,00,7,0,0,0,0,0   ), // #22 [ref=5x]
+  O(660F00,00,7,0,0,0,0,0   ), // #23 [ref=1x]
+  O(F30F00,00,6,0,0,0,0,0   ), // #24 [ref=4x]
+  O(F30F01,00,0,0,0,0,0,0   ), // #25 [ref=9x]
+  O(660F00,00,6,0,0,0,0,0   ), // #26 [ref=3x]
+  O(000000,00,7,0,0,0,0,0   ), // #27 [ref=5x]
+  O(000F00,00,1,0,1,0,0,0   ), // #28 [ref=2x]
+  O(000F00,00,1,0,0,0,0,0   ), // #29 [ref=6x]
+  O(F20F38,00,0,0,0,0,0,0   ), // #30 [ref=2x]
+  O(000000,00,1,0,0,0,0,0   ), // #31 [ref=3x]
+  O(000000,00,6,0,0,0,0,0   ), // #32 [ref=3x]
+  O(F30F00,00,7,0,0,0,0,3   ), // #33 [ref=1x]
+  O(F30F00,00,7,0,0,0,0,2   ), // #34 [ref=1x]
+  O_FPU(00,D900,0)           , // #35 [ref=29x]
+  O_FPU(00,C000,0)           , // #36 [ref=1x]
+  O_FPU(00,DE00,0)           , // #37 [ref=7x]
+  O_FPU(00,0000,4)           , // #38 [ref=4x]
+  O_FPU(00,0000,6)           , // #39 [ref=4x]
+  O_FPU(9B,DB00,0)           , // #40 [ref=2x]
+  O_FPU(00,DA00,0)           , // #41 [ref=5x]
+  O_FPU(00,DB00,0)           , // #42 [ref=8x]
+  O_FPU(00,D000,2)           , // #43 [ref=1x]
+  O_FPU(00,DF00,0)           , // #44 [ref=2x]
+  O_FPU(00,D800,3)           , // #45 [ref=1x]
+  O_FPU(00,F000,6)           , // #46 [ref=1x]
+  O_FPU(00,F800,7)           , // #47 [ref=1x]
+  O_FPU(00,DD00,0)           , // #48 [ref=3x]
+  O_FPU(00,0000,0)           , // #49 [ref=4x]
+  O_FPU(00,0000,2)           , // #50 [ref=3x]
+  O_FPU(00,0000,3)           , // #51 [ref=3x]
+  O_FPU(00,0000,7)           , // #52 [ref=3x]
+  O_FPU(00,0000,1)           , // #53 [ref=2x]
+  O_FPU(00,0000,5)           , // #54 [ref=2x]
+  O_FPU(00,C800,1)           , // #55 [ref=1x]
+  O_FPU(9B,0000,6)           , // #56 [ref=2x]
+  O_FPU(9B,0000,7)           , // #57 [ref=2x]
+  O_FPU(00,E000,4)           , // #58 [ref=1x]
+  O_FPU(00,E800,5)           , // #59 [ref=1x]
+  O(000F00,00,0,0,1,0,0,0   ), // #60 [ref=3x]
+  O(F30F3A,00,0,0,0,0,0,0   ), // #61 [ref=1x]
+  O(000000,00,5,0,0,0,0,0   ), // #62 [ref=4x]
+  O(F30F00,00,5,0,0,0,0,0   ), // #63 [ref=2x]
+  O(F30F00,00,5,0,1,0,0,0   ), // #64 [ref=1x]
+  V(660F00,00,0,1,0,0,0,None), // #65 [ref=7x]
+  V(660F00,00,0,1,1,0,0,None), // #66 [ref=6x]
+  V(000F00,00,0,1,1,0,0,None), // #67 [ref=7x]
+  V(000F00,00,0,1,0,0,0,None), // #68 [ref=8x]
+  V(660F00,00,0,0,0,0,0,None), // #69 [ref=15x]
+  V(660F00,00,0,0,1,0,0,None), // #70 [ref=4x]
+  V(000F00,00,0,0,1,0,0,None), // #71 [ref=4x]
+  V(000F00,00,0,0,0,0,0,None), // #72 [ref=10x]
+  V(660F3A,00,0,0,0,0,0,None), // #73 [ref=47x]
+  V(660F3A,00,0,0,1,0,0,None), // #74 [ref=4x]
+  O(000000,00,3,0,0,0,0,0   ), // #75 [ref=4x]
+  O(000F00,00,2,0,0,0,0,0   ), // #76 [ref=5x]
+  O(000F00,00,5,0,0,0,0,0   ), // #77 [ref=4x]
+  O(000F00,00,3,0,0,0,0,0   ), // #78 [ref=5x]
+  V(XOP_M9,00,0,0,0,0,0,None), // #79 [ref=32x]
+  O(000F00,00,6,0,0,0,0,0   ), // #80 [ref=5x]
+  V(XOP_MA,00,0,0,0,0,0,None), // #81 [ref=1x]
+  V(XOP_MA,00,1,0,0,0,0,None), // #82 [ref=1x]
+  O(000F38,00,0,0,0,0,0,0   ), // #83 [ref=24x]
+  V(F20F38,00,0,0,0,0,0,None), // #84 [ref=6x]
+  O(000F3A,00,0,0,0,0,0,0   ), // #85 [ref=4x]
+  O(F30000,00,0,0,0,0,0,0   ), // #86 [ref=1x]
+  O(000F0F,00,0,0,0,0,0,0   ), // #87 [ref=26x]
+  V(F30F38,00,0,0,0,0,0,None), // #88 [ref=5x]
+  O(000F3A,00,0,0,1,0,0,0   ), // #89 [ref=1x]
+  O(660F3A,00,0,0,1,0,0,0   ), // #90 [ref=1x]
+  O(F30F00,00,4,0,0,0,0,0   ), // #91 [ref=1x]
+  O(F20F01,00,0,0,0,0,0,0   ), // #92 [ref=4x]
+  O(F30F00,00,1,0,0,0,0,0   ), // #93 [ref=3x]
+  O(F30F00,00,7,0,0,0,0,0   ), // #94 [ref=1x]
+  V(F20F3A,00,0,0,0,0,0,None), // #95 [ref=1x]
+  V(660F38,00,0,0,0,0,0,None), // #96 [ref=26x]
+  O(000F00,00,4,0,0,0,0,0   ), // #97 [ref=4x]
+  V(XOP_M9,00,7,0,0,0,0,None), // #98 [ref=1x]
+  V(XOP_M9,00,4,0,0,0,0,None), // #99 [ref=1x]
+  O(F20F00,00,6,0,0,0,0,0   ), // #100 [ref=1x]
+  E(F20F38,00,0,2,0,0,4,None), // #101 [ref=4x]
+  E(F20F38,00,0,0,0,0,4,None), // #102 [ref=2x]
+  V(660F00,00,0,0,0,1,4,ByLL), // #103 [ref=25x]
+  E(00MAP5,00,0,0,0,0,4,ByLL), // #104 [ref=10x]
+  V(000F00,00,0,0,0,0,4,ByLL), // #105 [ref=19x]
+  V(F20F00,00,0,0,0,1,3,None), // #106 [ref=10x]
+  E(F3MAP5,00,0,0,0,0,1,None), // #107 [ref=13x]
+  V(F30F00,00,0,0,0,0,2,None), // #108 [ref=12x]
+  V(F20F00,00,0,0,0,0,0,None), // #109 [ref=4x]
+  V(660F38,00,0,0,0,0,4,ByLL), // #110 [ref=50x]
+  E(660F3A,00,0,0,0,0,4,ByLL), // #111 [ref=17x]
+  E(660F3A,00,0,0,0,1,4,ByLL), // #112 [ref=18x]
+  E(660F38,00,0,0,0,1,4,ByLL), // #113 [ref=40x]
+  E(660F38,00,0,0,0,0,4,ByLL), // #114 [ref=25x]
+  V(660F38,00,0,1,0,0,0,None), // #115 [ref=2x]
+  E(660F38,00,0,0,0,0,3,None), // #116 [ref=2x]
+  E(660F38,00,0,0,0,0,4,None), // #117 [ref=2x]
+  E(660F38,00,0,2,0,0,5,None), // #118 [ref=2x]
+  E(660F38,00,0,0,0,1,4,None), // #119 [ref=2x]
+  E(660F38,00,0,2,0,1,5,None), // #120 [ref=2x]
+  V(660F38,00,0,0,0,1,3,None), // #121 [ref=2x]
+  V(660F38,00,0,0,0,0,2,None), // #122 [ref=14x]
+  E(000F3A,00,0,0,0,0,4,ByLL), // #123 [ref=5x]
+  E(F30F3A,00,0,0,0,0,1,None), // #124 [ref=1x]
+  V(660F00,00,0,0,0,1,3,None), // #125 [ref=5x]
+  E(00MAP5,00,0,0,0,0,1,None), // #126 [ref=2x]
+  V(000F00,00,0,0,0,0,2,None), // #127 [ref=2x]
+  E(660F38,00,0,0,0,1,3,None), // #128 [ref=14x]
+  E(660F38,00,0,0,0,0,2,None), // #129 [ref=14x]
+  V(F30F00,00,0,0,0,0,3,ByLL), // #130 [ref=1x]
+  E(F20F38,00,0,0,0,0,4,ByLL), // #131 [ref=2x]
+  E(F30F38,00,0,0,0,0,4,ByLL), // #132 [ref=4x]
+  V(F20F00,00,0,0,0,1,4,ByLL), // #133 [ref=1x]
+  E(66MAP5,00,0,0,0,1,4,ByLL), // #134 [ref=1x]
+  E(660F00,00,0,0,0,1,4,ByLL), // #135 [ref=10x]
+  E(000F00,00,0,0,0,1,4,ByLL), // #136 [ref=3x]
+  E(66MAP5,00,0,0,0,0,3,ByLL), // #137 [ref=1x]
+  E(00MAP5,00,0,0,0,0,2,ByLL), // #138 [ref=1x]
+  V(660F38,00,0,0,0,0,3,ByLL), // #139 [ref=7x]
+  E(66MAP6,00,0,0,0,0,3,ByLL), // #140 [ref=1x]
+  E(66MAP5,00,0,0,0,0,2,ByLL), // #141 [ref=4x]
+  E(00MAP5,00,0,0,0,0,3,ByLL), // #142 [ref=2x]
+  E(66MAP5,00,0,0,0,0,4,ByLL), // #143 [ref=3x]
+  V(660F00,00,0,0,0,0,4,ByLL), // #144 [ref=43x]
+  V(000F00,00,0,0,0,0,3,ByLL), // #145 [ref=1x]
+  V(660F3A,00,0,0,0,0,3,ByLL), // #146 [ref=1x]
+  E(660F00,00,0,0,0,0,3,ByLL), // #147 [ref=4x]
+  E(000F00,00,0,0,0,0,4,ByLL), // #148 [ref=2x]
+  E(F30F00,00,0,0,0,1,4,ByLL), // #149 [ref=3x]
+  E(00MAP5,00,0,0,0,1,4,ByLL), // #150 [ref=1x]
+  E(F2MAP5,00,0,0,0,1,3,None), // #151 [ref=1x]
+  V(F20F00,00,0,0,0,0,3,None), // #152 [ref=2x]
+  E(F20F00,00,0,0,0,0,3,None), // #153 [ref=2x]
+  E(00MAP6,00,0,0,0,0,1,None), // #154 [ref=1x]
+  V(F20F00,00,0,0,0,0,2,T1W ), // #155 [ref=1x]
+  E(F3MAP5,00,0,0,0,0,2,T1W ), // #156 [ref=2x]
+  V(F30F00,00,0,0,0,0,2,T1W ), // #157 [ref=1x]
+  E(00MAP5,00,0,0,0,0,2,None), // #158 [ref=1x]
+  E(F30F00,00,0,0,0,0,2,None), // #159 [ref=2x]
+  E(F3MAP5,00,0,0,0,0,3,ByLL), // #160 [ref=1x]
+  V(F30F00,00,0,0,0,0,4,ByLL), // #161 [ref=4x]
+  E(F30F00,00,0,0,0,0,3,ByLL), // #162 [ref=1x]
+  E(F2MAP5,00,0,0,0,0,4,ByLL), // #163 [ref=2x]
+  E(F20F00,00,0,0,0,0,4,ByLL), // #164 [ref=2x]
+  E(F2MAP5,00,0,0,0,1,4,ByLL), // #165 [ref=1x]
+  E(F20F00,00,0,0,0,1,4,ByLL), // #166 [ref=2x]
+  E(F20F00,00,0,0,0,0,2,T1W ), // #167 [ref=1x]
+  E(F30F00,00,0,0,0,0,2,T1W ), // #168 [ref=1x]
+  E(F3MAP5,00,0,0,0,0,4,ByLL), // #169 [ref=1x]
+  E(660F38,00,0,2,0,1,4,ByLL), // #170 [ref=3x]
+  E(660F38,00,0,2,0,0,4,ByLL), // #171 [ref=3x]
+  V(660F3A,00,0,1,0,0,0,None), // #172 [ref=6x]
+  E(660F3A,00,0,0,0,0,4,None), // #173 [ref=4x]
+  E(660F3A,00,0,2,0,0,5,None), // #174 [ref=4x]
+  E(660F3A,00,0,0,0,1,4,None), // #175 [ref=4x]
+  E(660F3A,00,0,2,0,1,5,None), // #176 [ref=4x]
+  V(660F3A,00,0,0,0,0,2,None), // #177 [ref=4x]
+  E(F2MAP6,00,0,0,0,0,4,ByLL), // #178 [ref=2x]
+  E(F2MAP6,00,0,0,0,0,2,None), // #179 [ref=2x]
+  E(660F3A,00,0,0,0,1,3,None), // #180 [ref=6x]
+  E(660F3A,00,0,0,0,0,2,None), // #181 [ref=6x]
+  V(660F38,00,0,0,1,1,4,ByLL), // #182 [ref=20x]
+  E(66MAP6,00,0,0,0,0,4,ByLL), // #183 [ref=22x]
+  V(660F38,00,0,0,1,1,3,None), // #184 [ref=12x]
+  E(66MAP6,00,0,0,0,0,1,None), // #185 [ref=16x]
+  E(F3MAP6,00,0,0,0,0,4,ByLL), // #186 [ref=2x]
+  E(F3MAP6,00,0,0,0,0,2,None), // #187 [ref=2x]
+  E(000F3A,00,0,0,0,0,1,None), // #188 [ref=4x]
+  V(660F38,00,0,0,1,0,0,None), // #189 [ref=5x]
+  E(660F38,00,1,2,0,1,3,None), // #190 [ref=2x]
+  E(660F38,00,1,2,0,0,2,None), // #191 [ref=2x]
+  E(660F38,00,2,2,0,1,3,None), // #192 [ref=2x]
+  E(660F38,00,2,2,0,0,2,None), // #193 [ref=2x]
+  V(660F3A,00,0,0,1,1,4,ByLL), // #194 [ref=2x]
+  V(000F00,00,2,0,0,0,0,None), // #195 [ref=1x]
+  V(660F00,00,0,0,0,0,2,None), // #196 [ref=1x]
+  V(F20F00,00,0,0,0,1,3,DUP ), // #197 [ref=1x]
+  E(660F00,00,0,0,0,0,4,ByLL), // #198 [ref=6x]
+  V(F30F00,00,0,0,0,0,0,None), // #199 [ref=3x]
+  E(F30F00,00,0,0,0,0,4,ByLL), // #200 [ref=1x]
+  V(000F00,00,0,0,0,0,3,None), // #201 [ref=2x]
+  E(66MAP5,00,0,0,0,0,1,None), // #202 [ref=1x]
+  E(F20F38,00,0,0,0,1,4,ByLL), // #203 [ref=1x]
+  V(660F3A,00,0,0,0,0,4,ByLL), // #204 [ref=2x]
+  E(F30F38,00,0,0,0,1,0,None), // #205 [ref=5x]
+  E(F30F38,00,0,0,0,0,0,None), // #206 [ref=5x]
+  V(660F38,00,0,0,0,0,1,None), // #207 [ref=1x]
+  V(XOP_M8,00,0,0,0,0,0,None), // #208 [ref=22x]
+  V(660F38,00,0,0,0,1,4,ByLL), // #209 [ref=4x]
+  E(660F38,00,0,0,0,0,0,None), // #210 [ref=2x]
+  E(660F38,00,0,0,0,1,1,None), // #211 [ref=2x]
+  E(660F38,00,0,0,1,1,4,ByLL), // #212 [ref=1x]
+  V(660F3A,00,0,0,1,1,3,None), // #213 [ref=2x]
+  V(660F3A,00,0,0,0,0,1,None), // #214 [ref=1x]
+  V(660F00,00,0,0,0,0,1,None), // #215 [ref=1x]
+  E(F30F38,00,0,0,0,0,2,ByLL), // #216 [ref=6x]
+  E(F30F38,00,0,0,0,0,3,ByLL), // #217 [ref=9x]
+  E(F30F38,00,0,0,0,0,1,ByLL), // #218 [ref=3x]
+  V(660F38,00,0,0,0,0,2,ByLL), // #219 [ref=4x]
+  V(660F38,00,0,0,0,0,1,ByLL), // #220 [ref=2x]
+  E(660F00,00,1,0,0,0,4,ByLL), // #221 [ref=1x]
+  E(660F00,00,1,0,0,1,4,ByLL), // #222 [ref=1x]
+  V(F20F00,00,0,0,0,0,4,ByLL), // #223 [ref=1x]
+  V(660F00,00,0,0,0,0,4,None), // #224 [ref=6x]
+  V(660F00,00,7,0,0,0,4,ByLL), // #225 [ref=1x]
+  V(660F00,00,0,0,0,1,4,None), // #226 [ref=2x]
+  E(660F00,00,0,0,0,1,4,None), // #227 [ref=1x]
+  V(660F00,00,3,0,0,0,4,ByLL), // #228 [ref=1x]
+  E(F30F38,00,0,0,0,1,4,ByLL), // #229 [ref=2x]
+  E(660F38,00,5,2,0,1,3,None), // #230 [ref=2x]
+  E(660F38,00,5,2,0,0,2,None), // #231 [ref=2x]
+  E(660F38,00,6,2,0,1,3,None), // #232 [ref=2x]
+  E(660F38,00,6,2,0,0,2,None), // #233 [ref=2x]
+  V(000F00,00,3,0,0,0,0,None), // #234 [ref=1x]
+  O(F30F00,00,2,0,0,0,0,0   ), // #235 [ref=1x]
+  O(F30F00,00,3,0,0,0,0,0   ), // #236 [ref=1x]
+  O(000F38,00,0,0,1,0,0,0   ), // #237 [ref=1x]
+  O(660F38,00,0,0,1,0,0,0   ), // #238 [ref=1x]
+  O(000F00,00,5,0,1,0,0,0   ), // #239 [ref=2x]
+  O(000F00,00,3,0,1,0,0,0   ), // #240 [ref=1x]
+  O(000F00,00,4,0,1,0,0,0   ), // #241 [ref=2x]
+  O(000F00,00,6,0,1,0,0,0   )  // #242 [ref=1x]
+};
+// ----------------------------------------------------------------------------
+// ${MainOpcodeTable:End}
+
+// ${AltOpcodeTable:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+const uint32_t InstDB::_altOpcodeTable[] = {
+  O(000000,00,0,0,0,0,0,0   ), // #0 [ref=1514x]
+  O(660F00,1B,0,0,0,0,0,0   ), // #1 [ref=1x]
+  O(000F00,BA,4,0,0,0,0,0   ), // #2 [ref=1x]
+  O(000F00,BA,7,0,0,0,0,0   ), // #3 [ref=1x]
+  O(000F00,BA,6,0,0,0,0,0   ), // #4 [ref=1x]
+  O(000F00,BA,5,0,0,0,0,0   ), // #5 [ref=1x]
+  O(000000,48,0,0,0,0,0,0   ), // #6 [ref=1x]
+  O(660F00,78,0,0,0,0,0,0   ), // #7 [ref=1x]
+  O_FPU(00,00DF,5)           , // #8 [ref=1x]
+  O_FPU(00,00DF,7)           , // #9 [ref=1x]
+  O_FPU(00,00DD,1)           , // #10 [ref=1x]
+  O_FPU(00,00DB,5)           , // #11 [ref=1x]
+  O_FPU(00,DFE0,0)           , // #12 [ref=1x]
+  O(000000,DB,7,0,0,0,0,0   ), // #13 [ref=1x]
+  O_FPU(9B,DFE0,0)           , // #14 [ref=1x]
+  O(000000,E4,0,0,0,0,0,0   ), // #15 [ref=1x]
+  O(000000,40,0,0,0,0,0,0   ), // #16 [ref=1x]
+  O(F20F00,78,0,0,0,0,0,0   ), // #17 [ref=1x]
+  O(000000,77,0,0,0,0,0,0   ), // #18 [ref=2x]
+  O(000000,73,0,0,0,0,0,0   ), // #19 [ref=3x]
+  O(000000,72,0,0,0,0,0,0   ), // #20 [ref=3x]
+  O(000000,76,0,0,0,0,0,0   ), // #21 [ref=2x]
+  O(000000,74,0,0,0,0,0,0   ), // #22 [ref=2x]
+  O(000000,E3,0,0,0,0,0,0   ), // #23 [ref=1x]
+  O(000000,7F,0,0,0,0,0,0   ), // #24 [ref=2x]
+  O(000000,7D,0,0,0,0,0,0   ), // #25 [ref=2x]
+  O(000000,7C,0,0,0,0,0,0   ), // #26 [ref=2x]
+  O(000000,7E,0,0,0,0,0,0   ), // #27 [ref=2x]
+  O(000000,EB,0,0,0,0,0,0   ), // #28 [ref=1x]
+  O(000000,75,0,0,0,0,0,0   ), // #29 [ref=2x]
+  O(000000,71,0,0,0,0,0,0   ), // #30 [ref=1x]
+  O(000000,7B,0,0,0,0,0,0   ), // #31 [ref=2x]
+  O(000000,79,0,0,0,0,0,0   ), // #32 [ref=1x]
+  O(000000,70,0,0,0,0,0,0   ), // #33 [ref=1x]
+  O(000000,7A,0,0,0,0,0,0   ), // #34 [ref=2x]
+  O(000000,78,0,0,0,0,0,0   ), // #35 [ref=1x]
+  V(660F00,92,0,0,0,0,0,None), // #36 [ref=1x]
+  V(F20F00,92,0,0,0,0,0,None), // #37 [ref=1x]
+  V(F20F00,92,0,0,1,0,0,None), // #38 [ref=1x]
+  V(000F00,92,0,0,0,0,0,None), // #39 [ref=1x]
+  O(000000,9A,0,0,0,0,0,0   ), // #40 [ref=1x]
+  O(000000,EA,0,0,0,0,0,0   ), // #41 [ref=1x]
+  O(000000,E2,0,0,0,0,0,0   ), // #42 [ref=1x]
+  O(000000,E1,0,0,0,0,0,0   ), // #43 [ref=1x]
+  O(000000,E0,0,0,0,0,0,0   ), // #44 [ref=1x]
+  O(660F00,29,0,0,0,0,0,0   ), // #45 [ref=1x]
+  O(000F00,29,0,0,0,0,0,0   ), // #46 [ref=1x]
+  O(000F38,F1,0,0,0,0,0,0   ), // #47 [ref=1x]
+  O(000F00,7E,0,0,0,0,0,0   ), // #48 [ref=2x]
+  O(660F00,7F,0,0,0,0,0,0   ), // #49 [ref=1x]
+  O(F30F00,7F,0,0,0,0,0,0   ), // #50 [ref=1x]
+  O(660F00,17,0,0,0,0,0,0   ), // #51 [ref=1x]
+  O(000F00,17,0,0,0,0,0,0   ), // #52 [ref=1x]
+  O(660F00,13,0,0,0,0,0,0   ), // #53 [ref=1x]
+  O(000F00,13,0,0,0,0,0,0   ), // #54 [ref=1x]
+  O(660F00,E7,0,0,0,0,0,0   ), // #55 [ref=1x]
+  O(660F00,2B,0,0,0,0,0,0   ), // #56 [ref=1x]
+  O(000F00,2B,0,0,0,0,0,0   ), // #57 [ref=1x]
+  O(000F00,E7,0,0,0,0,0,0   ), // #58 [ref=1x]
+  O(F20F00,2B,0,0,0,0,0,0   ), // #59 [ref=1x]
+  O(F30F00,2B,0,0,0,0,0,0   ), // #60 [ref=1x]
+  O(F20F00,11,0,0,0,0,0,0   ), // #61 [ref=1x]
+  O(F30F00,11,0,0,0,0,0,0   ), // #62 [ref=1x]
+  O(660F00,11,0,0,0,0,0,0   ), // #63 [ref=1x]
+  O(000F00,11,0,0,0,0,0,0   ), // #64 [ref=1x]
+  O(000000,E6,0,0,0,0,0,0   ), // #65 [ref=1x]
+  O(000F3A,15,0,0,0,0,0,0   ), // #66 [ref=1x]
+  O(000000,58,0,0,0,0,0,0   ), // #67 [ref=1x]
+  O(000F00,72,6,0,0,0,0,0   ), // #68 [ref=1x]
+  O(660F00,73,7,0,0,0,0,0   ), // #69 [ref=1x]
+  O(000F00,73,6,0,0,0,0,0   ), // #70 [ref=1x]
+  O(000F00,71,6,0,0,0,0,0   ), // #71 [ref=1x]
+  O(000F00,72,4,0,0,0,0,0   ), // #72 [ref=1x]
+  O(000F00,71,4,0,0,0,0,0   ), // #73 [ref=1x]
+  O(000F00,72,2,0,0,0,0,0   ), // #74 [ref=1x]
+  O(660F00,73,3,0,0,0,0,0   ), // #75 [ref=1x]
+  O(000F00,73,2,0,0,0,0,0   ), // #76 [ref=1x]
+  O(000F00,71,2,0,0,0,0,0   ), // #77 [ref=1x]
+  O(000000,50,0,0,0,0,0,0   ), // #78 [ref=1x]
+  O(000000,F6,0,0,0,0,0,0   ), // #79 [ref=1x]
+  E(660F38,92,0,0,0,1,3,None), // #80 [ref=1x]
+  E(660F38,92,0,0,0,0,2,None), // #81 [ref=1x]
+  E(660F38,93,0,0,0,1,3,None), // #82 [ref=1x]
+  E(660F38,93,0,0,0,0,2,None), // #83 [ref=1x]
+  V(660F38,2F,0,0,0,0,0,None), // #84 [ref=1x]
+  V(660F38,2E,0,0,0,0,0,None), // #85 [ref=1x]
+  V(660F00,29,0,0,0,1,4,ByLL), // #86 [ref=1x]
+  V(000F00,29,0,0,0,0,4,ByLL), // #87 [ref=1x]
+  V(660F00,7E,0,0,0,0,2,None), // #88 [ref=1x]
+  V(660F00,7F,0,0,0,0,0,None), // #89 [ref=1x]
+  E(660F00,7F,0,0,0,0,4,ByLL), // #90 [ref=1x]
+  E(660F00,7F,0,0,0,1,4,ByLL), // #91 [ref=1x]
+  V(F30F00,7F,0,0,0,0,0,None), // #92 [ref=1x]
+  E(F20F00,7F,0,0,0,1,4,ByLL), // #93 [ref=1x]
+  E(F30F00,7F,0,0,0,0,4,ByLL), // #94 [ref=1x]
+  E(F30F00,7F,0,0,0,1,4,ByLL), // #95 [ref=1x]
+  E(F20F00,7F,0,0,0,0,4,ByLL), // #96 [ref=1x]
+  V(660F00,17,0,0,0,1,3,None), // #97 [ref=1x]
+  V(000F00,17,0,0,0,0,3,None), // #98 [ref=1x]
+  V(660F00,13,0,0,0,1,3,None), // #99 [ref=1x]
+  V(000F00,13,0,0,0,0,3,None), // #100 [ref=1x]
+  V(660F00,7E,0,0,0,1,3,None), // #101 [ref=1x]
+  V(F20F00,11,0,0,0,1,3,None), // #102 [ref=1x]
+  E(F3MAP5,11,0,0,0,0,1,None), // #103 [ref=1x]
+  V(F30F00,11,0,0,0,0,2,None), // #104 [ref=1x]
+  V(660F00,11,0,0,0,1,4,ByLL), // #105 [ref=1x]
+  V(000F00,11,0,0,0,0,4,ByLL), // #106 [ref=1x]
+  E(66MAP5,7E,0,0,0,0,1,None), // #107 [ref=1x]
+  E(660F38,7A,0,0,0,0,0,None), // #108 [ref=1x]
+  E(660F38,7C,0,0,0,0,0,None), // #109 [ref=1x]
+  E(660F38,7C,0,0,0,1,0,None), // #110 [ref=1x]
+  E(660F38,7B,0,0,0,0,0,None), // #111 [ref=1x]
+  V(660F3A,05,0,0,0,1,4,ByLL), // #112 [ref=1x]
+  V(660F3A,04,0,0,0,0,4,ByLL), // #113 [ref=1x]
+  V(660F3A,01,0,0,1,1,4,ByLL), // #114 [ref=1x]
+  V(660F3A,00,0,0,1,1,4,ByLL), // #115 [ref=1x]
+  E(660F38,90,0,0,0,0,2,None), // #116 [ref=1x]
+  E(660F38,90,0,0,0,1,3,None), // #117 [ref=1x]
+  E(660F38,91,0,0,0,0,2,None), // #118 [ref=1x]
+  E(660F38,91,0,0,0,1,3,None), // #119 [ref=1x]
+  V(660F38,8E,0,0,0,0,0,None), // #120 [ref=1x]
+  V(660F38,8E,0,0,1,0,0,None), // #121 [ref=1x]
+  V(XOP_M8,C0,0,0,0,0,0,None), // #122 [ref=1x]
+  V(XOP_M8,C2,0,0,0,0,0,None), // #123 [ref=1x]
+  V(XOP_M8,C3,0,0,0,0,0,None), // #124 [ref=1x]
+  V(XOP_M8,C1,0,0,0,0,0,None), // #125 [ref=1x]
+  V(660F00,72,6,0,0,0,4,ByLL), // #126 [ref=1x]
+  V(660F00,73,6,0,0,1,4,ByLL), // #127 [ref=1x]
+  V(660F00,71,6,0,0,0,4,ByLL), // #128 [ref=1x]
+  V(660F00,72,4,0,0,0,4,ByLL), // #129 [ref=1x]
+  E(660F00,72,4,0,0,1,4,ByLL), // #130 [ref=1x]
+  V(660F00,71,4,0,0,0,4,ByLL), // #131 [ref=1x]
+  V(660F00,72,2,0,0,0,4,ByLL), // #132 [ref=1x]
+  V(660F00,73,2,0,0,1,4,ByLL), // #133 [ref=1x]
+  V(660F00,71,2,0,0,0,4,ByLL)  // #134 [ref=1x]
+};
+// ----------------------------------------------------------------------------
+// ${AltOpcodeTable:End}
+
+#undef O
+#undef V
+#undef E
+#undef O_FPU
+
+// x86::InstDB - CommonInfoTable
+// =============================
+
+// ${InstCommonTable:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+#define F(VAL) uint32_t(InstDB::InstFlags::k##VAL)
+#define X(VAL) uint32_t(InstDB::Avx512Flags::k##VAL)
+#define CONTROL_FLOW(VAL) uint8_t(InstControlFlow::k##VAL)
+#define SAME_REG_HINT(VAL) uint8_t(InstSameRegHint::k##VAL)
+const InstDB::CommonInfo InstDB::_commonInfoTable[] = {
+  { 0                                                 , 0                             , 0  , 0 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #0 [ref=1x]
+  { 0                                                 , 0                             , 383, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #1 [ref=4x]
+  { 0                                                 , 0                             , 384, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #2 [ref=2x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 16 , 12, CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #3 [ref=2x]
+  { 0                                                 , 0                             , 180, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #4 [ref=2x]
+  { F(Vec)                                            , 0                             , 79 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #5 [ref=54x]
+  { F(Vec)                                            , 0                             , 106, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #6 [ref=19x]
+  { F(Vec)                                            , 0                             , 212, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #7 [ref=16x]
+  { F(Vec)                                            , 0                             , 221, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #8 [ref=20x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 28 , 11, CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #9 [ref=1x]
+  { F(Vex)                                            , 0                             , 275, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #10 [ref=3x]
+  { F(Vec)                                            , 0                             , 79 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #11 [ref=12x]
+  { 0                                                 , 0                             , 385, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #12 [ref=1x]
+  { F(Vex)                                            , 0                             , 277, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #13 [ref=5x]
+  { F(Vex)                                            , 0                             , 180, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #14 [ref=12x]
+  { F(Vec)                                            , 0                             , 386, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #15 [ref=4x]
+  { 0                                                 , 0                             , 279, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #16 [ref=3x]
+  { F(Mib)                                            , 0                             , 387, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #17 [ref=1x]
+  { 0                                                 , 0                             , 388, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #18 [ref=1x]
+  { 0                                                 , 0                             , 281, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #19 [ref=1x]
+  { F(Mib)                                            , 0                             , 389, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #20 [ref=1x]
+  { 0                                                 , 0                             , 283, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #21 [ref=1x]
+  { 0                                                 , 0                             , 179, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #22 [ref=35x]
+  { 0                                                 , 0                             , 390, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #23 [ref=3x]
+  { 0                                                 , 0                             , 123, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #24 [ref=1x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 123, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #25 [ref=3x]
+  { F(Rep)|F(RepIgnored)                              , 0                             , 285, 2 , CONTROL_FLOW(Call), SAME_REG_HINT(None)}, // #26 [ref=1x]
+  { 0                                                 , 0                             , 391, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #27 [ref=1x]
+  { 0                                                 , 0                             , 392, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #28 [ref=2x]
+  { 0                                                 , 0                             , 364, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #29 [ref=1x]
+  { 0                                                 , 0                             , 108, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #30 [ref=83x]
+  { 0                                                 , 0                             , 393, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #31 [ref=11x]
+  { 0                                                 , 0                             , 394, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #32 [ref=6x]
+  { 0                                                 , 0                             , 395, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #33 [ref=13x]
+  { 0                                                 , 0                             , 396, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #34 [ref=1x]
+  { 0                                                 , 0                             , 16 , 12, CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #35 [ref=1x]
+  { F(Rep)                                            , 0                             , 127, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #36 [ref=1x]
+  { F(Vec)                                            , 0                             , 397, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #37 [ref=2x]
+  { F(Vec)                                            , 0                             , 398, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #38 [ref=3x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 131, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #39 [ref=1x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 399, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #40 [ref=1x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 400, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #41 [ref=1x]
+  { 0                                                 , 0                             , 401, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #42 [ref=1x]
+  { 0                                                 , 0                             , 402, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #43 [ref=1x]
+  { 0                                                 , 0                             , 287, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #44 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 403, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #45 [ref=2x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 404, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #46 [ref=2x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 405, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #47 [ref=2x]
+  { F(Vec)                                            , 0                             , 406, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #48 [ref=2x]
+  { F(Vec)                                            , 0                             , 407, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #49 [ref=2x]
+  { F(Vec)                                            , 0                             , 408, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #50 [ref=2x]
+  { 0                                                 , 0                             , 409, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #51 [ref=1x]
+  { 0                                                 , 0                             , 410, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #52 [ref=2x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 289, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #53 [ref=2x]
+  { 0                                                 , 0                             , 39 , 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #54 [ref=3x]
+  { F(Mmx)                                            , 0                             , 108, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #55 [ref=1x]
+  { 0                                                 , 0                             , 291, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #56 [ref=2x]
+  { 0                                                 , 0                             , 411, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #57 [ref=1x]
+  { F(Vec)                                            , 0                             , 412, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #58 [ref=2x]
+  { F(Vec)                                            , 0                             , 293, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #59 [ref=1x]
+  { F(FpuM32)|F(FpuM64)                               , 0                             , 182, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #60 [ref=6x]
+  { 0                                                 , 0                             , 295, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #61 [ref=9x]
+  { F(FpuM80)                                         , 0                             , 413, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #62 [ref=2x]
+  { 0                                                 , 0                             , 296, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #63 [ref=13x]
+  { F(FpuM32)|F(FpuM64)                               , 0                             , 297, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #64 [ref=2x]
+  { F(FpuM16)|F(FpuM32)                               , 0                             , 414, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #65 [ref=9x]
+  { F(FpuM16)|F(FpuM32)|F(FpuM64)                     , 0                             , 415, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #66 [ref=3x]
+  { F(FpuM32)|F(FpuM64)|F(FpuM80)                     , 0                             , 416, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #67 [ref=2x]
+  { F(FpuM16)                                         , 0                             , 417, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #68 [ref=3x]
+  { 0                                                 , 0                             , 418, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #69 [ref=13x]
+  { F(FpuM16)                                         , 0                             , 419, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #70 [ref=2x]
+  { F(FpuM32)|F(FpuM64)                               , 0                             , 298, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #71 [ref=1x]
+  { 0                                                 , 0                             , 420, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #72 [ref=2x]
+  { 0                                                 , 0                             , 421, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #73 [ref=1x]
+  { 0                                                 , 0                             , 39 , 10, CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #74 [ref=1x]
+  { 0                                                 , 0                             , 422, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #75 [ref=1x]
+  { 0                                                 , 0                             , 423, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #76 [ref=2x]
+  { 0                                                 , 0                             , 348, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #77 [ref=3x]
+  { F(Rep)                                            , 0                             , 424, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #78 [ref=1x]
+  { F(Vec)                                            , 0                             , 299, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #79 [ref=1x]
+  { 0                                                 , 0                             , 425, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #80 [ref=2x]
+  { 0                                                 , 0                             , 426, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #81 [ref=8x]
+  { 0                                                 , 0                             , 301, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #82 [ref=3x]
+  { 0                                                 , 0                             , 303, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #83 [ref=1x]
+  { 0                                                 , 0                             , 108, 1 , CONTROL_FLOW(Return), SAME_REG_HINT(None)}, // #84 [ref=2x]
+  { 0                                                 , 0                             , 395, 1 , CONTROL_FLOW(Return), SAME_REG_HINT(None)}, // #85 [ref=1x]
+  { F(Rep)|F(RepIgnored)                              , 0                             , 305, 2 , CONTROL_FLOW(Branch), SAME_REG_HINT(None)}, // #86 [ref=30x]
+  { F(Rep)|F(RepIgnored)                              , 0                             , 307, 2 , CONTROL_FLOW(Branch), SAME_REG_HINT(None)}, // #87 [ref=1x]
+  { F(Rep)|F(RepIgnored)                              , 0                             , 309, 2 , CONTROL_FLOW(Jump), SAME_REG_HINT(None)}, // #88 [ref=1x]
+  { F(Vex)                                            , 0                             , 427, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #89 [ref=19x]
+  { F(Vex)                                            , 0                             , 311, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #90 [ref=1x]
+  { F(Vex)                                            , 0                             , 313, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #91 [ref=1x]
+  { F(Vex)                                            , 0                             , 315, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #92 [ref=1x]
+  { F(Vex)                                            , 0                             , 317, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #93 [ref=1x]
+  { F(Vex)                                            , 0                             , 428, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #94 [ref=12x]
+  { F(Vex)                                            , 0                             , 429, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #95 [ref=8x]
+  { F(Vex)                                            , 0                             , 427, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #96 [ref=8x]
+  { 0                                                 , 0                             , 430, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #97 [ref=2x]
+  { 0                                                 , 0                             , 319, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #98 [ref=1x]
+  { 0                                                 , 0                             , 321, 2 , CONTROL_FLOW(Call), SAME_REG_HINT(None)}, // #99 [ref=1x]
+  { F(Vec)                                            , 0                             , 230, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #100 [ref=2x]
+  { 0                                                 , 0                             , 431, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #101 [ref=2x]
+  { 0                                                 , 0                             , 323, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #102 [ref=2x]
+  { F(Vex)                                            , 0                             , 432, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #103 [ref=2x]
+  { 0                                                 , 0                             , 433, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #104 [ref=1x]
+  { 0                                                 , 0                             , 185, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #105 [ref=3x]
+  { 0                                                 , 0                             , 321, 2 , CONTROL_FLOW(Jump), SAME_REG_HINT(None)}, // #106 [ref=1x]
+  { 0                                                 , 0                             , 434, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #107 [ref=5x]
+  { F(Vex)                                            , 0                             , 435, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #108 [ref=2x]
+  { F(Rep)                                            , 0                             , 135, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #109 [ref=1x]
+  { 0                                                 , 0                             , 307, 2 , CONTROL_FLOW(Branch), SAME_REG_HINT(None)}, // #110 [ref=3x]
+  { 0                                                 , 0                             , 325, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #111 [ref=1x]
+  { F(Vex)                                            , 0                             , 436, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #112 [ref=2x]
+  { F(Vec)                                            , 0                             , 437, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #113 [ref=1x]
+  { F(Mmx)                                            , 0                             , 438, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #114 [ref=1x]
+  { 0                                                 , 0                             , 439, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #115 [ref=2x]
+  { F(XRelease)                                       , 0                             , 0  , 16, CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #116 [ref=1x]
+  { 0                                                 , 0                             , 49 , 9 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #117 [ref=1x]
+  { F(Vec)                                            , 0                             , 79 , 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #118 [ref=6x]
+  { 0                                                 , 0                             , 73 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #119 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 327, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #120 [ref=1x]
+  { 0                                                 , 0                             , 440, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #121 [ref=1x]
+  { 0                                                 , 0                             , 77 , 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #122 [ref=2x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 441, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #123 [ref=1x]
+  { F(Vec)                                            , 0                             , 294, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #124 [ref=2x]
+  { F(Vec)                                            , 0                             , 236, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #125 [ref=4x]
+  { F(Vec)                                            , 0                             , 442, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #126 [ref=2x]
+  { F(Vec)                                            , 0                             , 80 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #127 [ref=3x]
+  { F(Mmx)                                            , 0                             , 443, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #128 [ref=1x]
+  { F(Vec)                                            , 0                             , 107, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #129 [ref=1x]
+  { F(Vec)                                            , 0                             , 242, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #130 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 103, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #131 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 444, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #132 [ref=1x]
+  { F(Rep)                                            , 0                             , 139, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #133 [ref=1x]
+  { F(Vec)                                            , 0                             , 106, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #134 [ref=1x]
+  { F(Vec)                                            , 0                             , 329, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #135 [ref=1x]
+  { 0                                                 , 0                             , 331, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #136 [ref=2x]
+  { 0                                                 , 0                             , 333, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #137 [ref=1x]
+  { F(Vex)                                            , 0                             , 335, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #138 [ref=1x]
+  { 0                                                 , 0                             , 445, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #139 [ref=1x]
+  { 0                                                 , 0                             , 446, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #140 [ref=1x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 290, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #141 [ref=2x]
+  { 0                                                 , 0                             , 108, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #142 [ref=1x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 16 , 12, CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #143 [ref=1x]
+  { 0                                                 , 0                             , 447, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #144 [ref=1x]
+  { F(Rep)                                            , 0                             , 448, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #145 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 337, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #146 [ref=37x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 339, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #147 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 337, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #148 [ref=6x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 337, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #149 [ref=16x]
+  { F(Mmx)                                            , 0                             , 337, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #150 [ref=26x]
+  { F(Vec)                                            , 0                             , 79 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #151 [ref=4x]
+  { F(Vec)                                            , 0                             , 449, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #152 [ref=1x]
+  { F(Vec)                                            , 0                             , 450, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #153 [ref=1x]
+  { F(Vec)                                            , 0                             , 451, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #154 [ref=1x]
+  { F(Vec)                                            , 0                             , 452, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #155 [ref=1x]
+  { F(Vec)                                            , 0                             , 453, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #156 [ref=1x]
+  { F(Vec)                                            , 0                             , 454, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #157 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 341, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #158 [ref=1x]
+  { F(Vec)                                            , 0                             , 455, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #159 [ref=1x]
+  { F(Vec)                                            , 0                             , 456, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #160 [ref=1x]
+  { F(Vec)                                            , 0                             , 457, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #161 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 458, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #162 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 459, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #163 [ref=1x]
+  { F(Vec)                                            , 0                             , 263, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #164 [ref=2x]
+  { 0                                                 , 0                             , 143, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #165 [ref=1x]
+  { F(Mmx)                                            , 0                             , 339, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #166 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 343, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #167 [ref=8x]
+  { F(Vec)                                            , 0                             , 460, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #168 [ref=2x]
+  { 0                                                 , 0                             , 461, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #169 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 345, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #170 [ref=3x]
+  { 0                                                 , 0                             , 147, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #171 [ref=1x]
+  { 0                                                 , 0                             , 462, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #172 [ref=8x]
+  { 0                                                 , 0                             , 463, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #173 [ref=4x]
+  { 0                                                 , 0                             , 464, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #174 [ref=8x]
+  { 0                                                 , 0                             , 347, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #175 [ref=1x]
+  { F(Rep)|F(RepIgnored)                              , 0                             , 349, 2 , CONTROL_FLOW(Return), SAME_REG_HINT(None)}, // #176 [ref=1x]
+  { 0                                                 , 0                             , 349, 2 , CONTROL_FLOW(Return), SAME_REG_HINT(None)}, // #177 [ref=1x]
+  { F(Vex)                                            , 0                             , 351, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #178 [ref=1x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 16 , 12, CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #179 [ref=3x]
+  { F(Rep)                                            , 0                             , 151, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #180 [ref=1x]
+  { 0                                                 , 0                             , 465, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #181 [ref=30x]
+  { 0                                                 , 0                             , 188, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #182 [ref=2x]
+  { 0                                                 , 0                             , 466, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #183 [ref=3x]
+  { F(Rep)                                            , 0                             , 155, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #184 [ref=1x]
+  { F(Vex)                                            , 0                             , 467, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #185 [ref=5x]
+  { 0                                                 , 0                             , 66 , 7 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #186 [ref=1x]
+  { F(Tsib)|F(Vex)                                    , 0                             , 468, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #187 [ref=2x]
+  { F(Vex)                                            , 0                             , 395, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #188 [ref=1x]
+  { F(Tsib)|F(Vex)                                    , 0                             , 469, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #189 [ref=1x]
+  { F(Vex)                                            , 0                             , 470, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #190 [ref=1x]
+  { 0                                                 , 0                             , 471, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #191 [ref=2x]
+  { 0                                                 , 0                             , 180, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #192 [ref=2x]
+  { 0                                                 , 0                             , 472, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #193 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(T4X)|X(Z)              , 473, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #194 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(T4X)|X(Z)              , 474, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #195 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #196 [ref=22x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(ER)|X(K)|X(SAE)|X(Z) , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #197 [ref=23x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #198 [ref=22x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(ER)|X(K)|X(SAE)|X(Z)        , 475, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #199 [ref=18x]
+  { F(Evex)|F(Vec)                                    , X(ER)|X(K)|X(SAE)|X(Z)        , 476, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #200 [ref=18x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(ER)|X(K)|X(SAE)|X(Z)        , 477, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #201 [ref=17x]
+  { F(Vec)|F(Vex)                                     , 0                             , 191, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #202 [ref=15x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #203 [ref=5x]
+  { F(Vec)|F(Vex)                                     , 0                             , 79 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #204 [ref=17x]
+  { F(Vec)|F(Vex)                                     , 0                             , 221, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #205 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #206 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(Z)              , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #207 [ref=4x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #208 [ref=10x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #209 [ref=12x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #210 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #211 [ref=6x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #212 [ref=19x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #213 [ref=12x]
+  { F(Vec)|F(Vex)                                     , 0                             , 194, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #214 [ref=6x]
+  { F(Vec)|F(Vex)                                     , 0                             , 353, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #215 [ref=3x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 478, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #216 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 479, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #217 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 480, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #218 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 481, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #219 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 482, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #220 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 479, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #221 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 483, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #222 [ref=1x]
+  { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex)                 , X(B64)|X(K)|X(SAE)|X(Z)       , 197, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #223 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(K)|X(SAE)            , 200, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #224 [ref=1x]
+  { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex)                 , X(B32)|X(K)|X(SAE)|X(Z)       , 197, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #225 [ref=1x]
+  { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex)                 , X(K)|X(SAE)|X(Z)              , 484, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #226 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(SAE)                   , 485, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #227 [ref=1x]
+  { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex)                 , X(K)|X(SAE)|X(Z)              , 486, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #228 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(SAE)                        , 106, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #229 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(SAE)                        , 263, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #230 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(SAE)                        , 212, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #231 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 203, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #232 [ref=6x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #233 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 355, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #234 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #235 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 355, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #236 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 355, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #237 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 487, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #238 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #239 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 355, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #240 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(ER)|X(K)|X(SAE)|X(Z) , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #241 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(K)|X(SAE)|X(Z)       , 212, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #242 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(SAE)|X(Z)              , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #243 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(K)|X(SAE)|X(Z)       , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #244 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(ER)|X(K)|X(SAE)|X(Z) , 212, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #245 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(ER)|X(K)|X(SAE)|X(Z) , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #246 [ref=5x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #247 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(SAE)|X(Z)              , 215, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #248 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #249 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #250 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(ER)|X(K)|X(SAE)|X(Z)        , 475, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #251 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(ER)|X(SAE)                  , 406, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #252 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(ER)|X(SAE)                  , 406, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #253 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(SAE)|X(Z)              , 476, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #254 [ref=5x]
+  { F(Evex)|F(Vec)                                    , X(ER)|X(SAE)                  , 488, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #255 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(ER)|X(SAE)                  , 489, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #256 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(ER)|X(SAE)                  , 489, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #257 [ref=4x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(SAE)|X(Z)              , 477, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #258 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(ER)|X(K)|X(SAE)|X(Z)        , 477, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #259 [ref=6x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(ER)|X(SAE)                  , 408, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #260 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(ER)|X(SAE)                  , 408, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #261 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(SAE)|X(Z)       , 355, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #262 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(SAE)|X(Z)       , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #263 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(SAE)|X(Z)       , 355, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #264 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(K)|X(SAE)|X(Z)       , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #265 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(SAE)|X(Z)       , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #266 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(SAE)|X(Z)       , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #267 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(SAE)|X(Z)       , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #268 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(SAE)                        , 406, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #269 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(SAE)                        , 406, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #270 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(SAE)                        , 488, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #271 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(SAE)                        , 408, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #272 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(SAE)                        , 408, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #273 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #274 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #275 [ref=3x]
+  { F(Vec)|F(Vex)                                     , 0                             , 194, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #276 [ref=9x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(SAE)|X(Z)       , 83 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #277 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(SAE)|X(Z)       , 83 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #278 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #279 [ref=8x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 216, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #280 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 490, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #281 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 217, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #282 [ref=4x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 412, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #283 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #284 [ref=5x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(SAE)|X(Z)       , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #285 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(SAE)|X(Z)       , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #286 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(SAE)|X(Z)              , 491, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #287 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(SAE)|X(Z)              , 492, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #288 [ref=4x]
+  { F(Vec)|F(Vex)                                     , 0                             , 159, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #289 [ref=13x]
+  { F(Vec)|F(Vex)                                     , 0                             , 357, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #290 [ref=4x]
+  { F(Vec)|F(Vex)                                     , 0                             , 359, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #291 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)                   , 493, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #292 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(K)                   , 493, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #293 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)                   , 493, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #294 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)                          , 494, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #295 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)                          , 495, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #296 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)                          , 496, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #297 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 209, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #298 [ref=7x]
+  { F(Vec)|F(Vex)                                     , 0                             , 106, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #299 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 212, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #300 [ref=1x]
+  { F(Evex)|F(EvexTwoOp)|F(Vec)|F(Vex)|F(Vsib)        , X(K)                          , 163, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #301 [ref=2x]
+  { F(Evex)|F(EvexTwoOp)|F(Vec)|F(Vex)|F(Vsib)        , X(K)                          , 113, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #302 [ref=2x]
+  { F(Evex)|F(Vsib)                                   , X(K)                          , 497, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #303 [ref=4x]
+  { F(Evex)|F(Vsib)                                   , X(K)                          , 498, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #304 [ref=4x]
+  { F(Evex)|F(Vsib)                                   , X(K)                          , 499, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #305 [ref=8x]
+  { F(Evex)|F(EvexTwoOp)|F(Vec)|F(Vex)|F(Vsib)        , X(K)                          , 118, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #306 [ref=2x]
+  { F(Evex)|F(EvexTwoOp)|F(Vec)|F(Vex)|F(Vsib)        , X(K)                          , 218, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #307 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(SAE)|X(Z)              , 475, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #308 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(SAE)|X(Z)              , 477, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #309 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(SAE)|X(Z)       , 221, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #310 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(K)|X(SAE)|X(Z)       , 221, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #311 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(SAE)|X(Z)       , 221, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #312 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(SAE)|X(Z)              , 500, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #313 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #314 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #315 [ref=22x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 361, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #316 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 361, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #317 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 501, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #318 [ref=4x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 492, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #319 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 230, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #320 [ref=1x]
+  { F(Vex)                                            , 0                             , 431, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #321 [ref=2x]
+  { F(Vec)|F(Vex)                                     , 0                             , 437, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #322 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 167, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #323 [ref=4x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(SAE)|X(Z)       , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #324 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(K)|X(SAE)|X(Z)       , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #325 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(SAE)|X(Z)       , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #326 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(SAE)|X(Z)              , 475, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #327 [ref=2x]
+  { 0                                                 , 0                             , 363, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #328 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 79 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #329 [ref=4x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 365, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #330 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 224, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #331 [ref=1x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 79 , 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #332 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 79 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #333 [ref=6x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 238, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #334 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 367, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #335 [ref=4x]
+  { F(Vec)|F(Vex)                                     , 0                             , 502, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #336 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 227, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #337 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 230, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #338 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 233, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #339 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 236, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #340 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 239, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #341 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #342 [ref=4x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 242, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #343 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 369, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #344 [ref=1x]
+  { 0                                                 , 0                             , 371, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #345 [ref=1x]
+  { 0                                                 , 0                             , 373, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #346 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)                        , 245, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #347 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B64)                        , 245, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #348 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #349 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(Z)              , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #350 [ref=5x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 191, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #351 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #352 [ref=2x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 191, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #353 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #354 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #355 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #356 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #357 [ref=13x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 503, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #358 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 504, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #359 [ref=1x]
+  { F(Evex)|F(Vec)                                    , 0                             , 505, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #360 [ref=6x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 248, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #361 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 506, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #362 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #363 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)                          , 200, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #364 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)                   , 200, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #365 [ref=2x]
+  { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex)                 , X(K)                          , 251, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #366 [ref=4x]
+  { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex)                 , X(B32)|X(K)                   , 251, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #367 [ref=2x]
+  { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex)                 , X(B64)|X(K)                   , 251, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #368 [ref=2x]
+  { F(Vec)|F(Vex)                                     , 0                             , 449, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #369 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 450, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #370 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 451, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #371 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 452, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #372 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)                   , 200, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #373 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #374 [ref=6x]
+  { F(Evex)|F(EvexCompat)|F(PreferEvex)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #375 [ref=4x]
+  { F(Vec)|F(Vex)                                     , 0                             , 195, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #376 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 192, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #377 [ref=2x]
+  { F(Vec)|F(Vex)                                     , 0                             , 171, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #378 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(Z)              , 85 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #379 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 85 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #380 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(Z)              , 175, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #381 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 453, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #382 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 454, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #383 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 507, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #384 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 508, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #385 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 509, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #386 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 510, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #387 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 511, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #388 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 353, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #389 [ref=12x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #390 [ref=8x]
+  { F(Evex)|F(Vec)                                    , 0                             , 512, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #391 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 254, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #392 [ref=6x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 257, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #393 [ref=9x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 260, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #394 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 212, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #395 [ref=4x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 263, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #396 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #397 [ref=6x]
+  { F(Vec)|F(Vex)                                     , 0                             , 159, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #398 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 221, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #399 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(Z)              , 221, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #400 [ref=3x]
+  { F(Vec)|F(Vex)                                     , 0                             , 375, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #401 [ref=4x]
+  { F(Evex)|F(Vec)|F(Vsib)                            , X(K)                          , 266, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #402 [ref=2x]
+  { F(Evex)|F(Vec)|F(Vsib)                            , X(K)                          , 377, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #403 [ref=2x]
+  { F(Evex)|F(Vec)|F(Vsib)                            , X(K)                          , 379, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #404 [ref=2x]
+  { F(Evex)|F(Vec)|F(Vsib)                            , X(K)                          , 269, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #405 [ref=2x]
+  { F(Vec)|F(Vex)                                     , 0                             , 381, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #406 [ref=8x]
+  { F(Evex)|F(Vec)                                    , X(K)                          , 272, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #407 [ref=5x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 221, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #408 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 221, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #409 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 91 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #410 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 221, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #411 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(Z)              , 91 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #412 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 91 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #413 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(Z)              , 97 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #414 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #415 [ref=6x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #416 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #417 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)                   , 272, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #418 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)                   , 272, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #419 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 475, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #420 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 477, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #421 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(K)|X(Z)              , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #422 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 476, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #423 [ref=2x]
+  { F(Vec)|F(Vex)                                     , 0                             , 477, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #424 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 491, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #425 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 492, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #426 [ref=1x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 221, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #427 [ref=2x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 491, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #428 [ref=1x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 492, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #429 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #430 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 195, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #431 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(Z)              , 195, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #432 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(Z)              , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #433 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #434 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #435 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 108, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #436 [ref=2x]
+  { 0                                                 , 0                             , 23 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #437 [ref=2x]
+  { 0                                                 , 0                             , 61 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #438 [ref=2x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 58 , 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #439 [ref=1x]
+  { 0                                                 , 0                             , 513, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #440 [ref=1x]
+  { F(Lock)|F(XAcquire)                               , 0                             , 58 , 8 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #441 [ref=1x]
+  { 0                                                 , 0                             , 514, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #442 [ref=6x]
+  { 0                                                 , 0                             , 515, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}  // #443 [ref=6x]
+};
+#undef SAME_REG_HINT
+#undef CONTROL_FLOW
+#undef X
+#undef F
+// ----------------------------------------------------------------------------
+// ${InstCommonTable:End}
+
+// x86::InstDB - AdditionalInfoTable
+// =================================
+
+// ${AdditionalInfoTable:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+#define EXT(VAL) uint32_t(CpuFeatures::X86::k##VAL)
+const InstDB::AdditionalInfo InstDB::_additionalInfoTable[] = {
+  { 0, 0, { 0 } }, // #0 [ref=148x]
+  { 0, 1, { 0 } }, // #1 [ref=32x]
+  { 0, 2, { 0 } }, // #2 [ref=2x]
+  { 0, 3, { EXT(ADX) } }, // #3 [ref=1x]
+  { 0, 0, { EXT(SSE2) } }, // #4 [ref=60x]
+  { 0, 0, { EXT(SSE) } }, // #5 [ref=41x]
+  { 0, 0, { EXT(SSE3) } }, // #6 [ref=12x]
+  { 0, 4, { EXT(ADX) } }, // #7 [ref=1x]
+  { 0, 0, { EXT(AESNI) } }, // #8 [ref=6x]
+  { 0, 1, { EXT(BMI) } }, // #9 [ref=6x]
+  { 0, 5, { 0 } }, // #10 [ref=5x]
+  { 0, 0, { EXT(TBM) } }, // #11 [ref=9x]
+  { 0, 0, { EXT(SSE4_1) } }, // #12 [ref=47x]
+  { 0, 0, { EXT(MPX) } }, // #13 [ref=7x]
+  { 0, 6, { 0 } }, // #14 [ref=4x]
+  { 0, 1, { EXT(BMI2) } }, // #15 [ref=1x]
+  { 0, 7, { EXT(SMAP) } }, // #16 [ref=2x]
+  { 0, 8, { 0 } }, // #17 [ref=2x]
+  { 0, 9, { 0 } }, // #18 [ref=2x]
+  { 0, 0, { EXT(CLDEMOTE) } }, // #19 [ref=1x]
+  { 0, 0, { EXT(CLFLUSH) } }, // #20 [ref=1x]
+  { 0, 0, { EXT(CLFLUSHOPT) } }, // #21 [ref=1x]
+  { 0, 0, { EXT(SVM) } }, // #22 [ref=6x]
+  { 0, 10, { 0 } }, // #23 [ref=2x]
+  { 0, 1, { EXT(CET_SS) } }, // #24 [ref=3x]
+  { 0, 0, { EXT(UINTR) } }, // #25 [ref=4x]
+  { 0, 0, { EXT(CLWB) } }, // #26 [ref=1x]
+  { 0, 0, { EXT(CLZERO) } }, // #27 [ref=1x]
+  { 0, 3, { 0 } }, // #28 [ref=1x]
+  { 0, 11, { EXT(CMOV) } }, // #29 [ref=6x]
+  { 0, 12, { EXT(CMOV) } }, // #30 [ref=8x]
+  { 0, 13, { EXT(CMOV) } }, // #31 [ref=6x]
+  { 0, 14, { EXT(CMOV) } }, // #32 [ref=4x]
+  { 0, 15, { EXT(CMOV) } }, // #33 [ref=4x]
+  { 0, 16, { EXT(CMOV) } }, // #34 [ref=2x]
+  { 0, 17, { EXT(CMOV) } }, // #35 [ref=6x]
+  { 0, 18, { EXT(CMOV) } }, // #36 [ref=2x]
+  { 0, 19, { 0 } }, // #37 [ref=2x]
+  { 0, 1, { EXT(I486) } }, // #38 [ref=2x]
+  { 0, 5, { EXT(CMPXCHG16B) } }, // #39 [ref=1x]
+  { 0, 5, { EXT(CMPXCHG8B) } }, // #40 [ref=1x]
+  { 0, 1, { EXT(SSE2) } }, // #41 [ref=2x]
+  { 0, 1, { EXT(SSE) } }, // #42 [ref=2x]
+  { 0, 0, { EXT(I486) } }, // #43 [ref=4x]
+  { 0, 0, { EXT(SSE4_2) } }, // #44 [ref=2x]
+  { 0, 20, { 0 } }, // #45 [ref=2x]
+  { 0, 0, { EXT(MMX) } }, // #46 [ref=1x]
+  { 0, 0, { EXT(CET_IBT) } }, // #47 [ref=2x]
+  { 0, 0, { EXT(ENQCMD) } }, // #48 [ref=2x]
+  { 0, 0, { EXT(SSE4A) } }, // #49 [ref=4x]
+  { 0, 21, { 0 } }, // #50 [ref=4x]
+  { 0, 0, { EXT(3DNOW) } }, // #51 [ref=21x]
+  { 0, 0, { EXT(FXSR) } }, // #52 [ref=4x]
+  { 0, 0, { EXT(SMX) } }, // #53 [ref=1x]
+  { 0, 0, { EXT(GFNI) } }, // #54 [ref=3x]
+  { 0, 0, { EXT(HRESET) } }, // #55 [ref=1x]
+  { 0, 0, { EXT(CET_SS) } }, // #56 [ref=9x]
+  { 0, 16, { 0 } }, // #57 [ref=5x]
+  { 0, 0, { EXT(VMX) } }, // #58 [ref=12x]
+  { 0, 11, { 0 } }, // #59 [ref=8x]
+  { 0, 12, { 0 } }, // #60 [ref=12x]
+  { 0, 13, { 0 } }, // #61 [ref=10x]
+  { 0, 14, { 0 } }, // #62 [ref=8x]
+  { 0, 15, { 0 } }, // #63 [ref=8x]
+  { 0, 17, { 0 } }, // #64 [ref=8x]
+  { 0, 18, { 0 } }, // #65 [ref=4x]
+  { 0, 0, { EXT(AVX512_DQ) } }, // #66 [ref=22x]
+  { 0, 0, { EXT(AVX512_BW) } }, // #67 [ref=20x]
+  { 0, 0, { EXT(AVX512_F) } }, // #68 [ref=36x]
+  { 1, 0, { EXT(AVX512_DQ) } }, // #69 [ref=1x]
+  { 1, 0, { EXT(AVX512_BW) } }, // #70 [ref=2x]
+  { 1, 0, { EXT(AVX512_F) } }, // #71 [ref=1x]
+  { 0, 1, { EXT(AVX512_DQ) } }, // #72 [ref=3x]
+  { 0, 1, { EXT(AVX512_BW) } }, // #73 [ref=4x]
+  { 0, 1, { EXT(AVX512_F) } }, // #74 [ref=1x]
+  { 0, 22, { EXT(LAHFSAHF) } }, // #75 [ref=1x]
+  { 0, 0, { EXT(AMX_TILE) } }, // #76 [ref=7x]
+  { 0, 0, { EXT(LWP) } }, // #77 [ref=4x]
+  { 0, 23, { 0 } }, // #78 [ref=3x]
+  { 0, 1, { EXT(LZCNT) } }, // #79 [ref=1x]
+  { 0, 0, { EXT(MMX2) } }, // #80 [ref=8x]
+  { 0, 1, { EXT(MCOMMIT) } }, // #81 [ref=1x]
+  { 0, 0, { EXT(MONITOR) } }, // #82 [ref=2x]
+  { 0, 0, { EXT(MONITORX) } }, // #83 [ref=2x]
+  { 1, 0, { 0 } }, // #84 [ref=1x]
+  { 1, 0, { EXT(SSE2) } }, // #85 [ref=5x]
+  { 1, 0, { EXT(SSE) } }, // #86 [ref=3x]
+  { 0, 0, { EXT(MOVBE) } }, // #87 [ref=1x]
+  { 0, 0, { EXT(MMX), EXT(SSE2) } }, // #88 [ref=45x]
+  { 0, 0, { EXT(MOVDIR64B) } }, // #89 [ref=1x]
+  { 0, 0, { EXT(MOVDIRI) } }, // #90 [ref=1x]
+  { 1, 0, { EXT(MMX), EXT(SSE2) } }, // #91 [ref=1x]
+  { 0, 0, { EXT(BMI2) } }, // #92 [ref=7x]
+  { 0, 0, { EXT(SSSE3) } }, // #93 [ref=15x]
+  { 0, 0, { EXT(MMX2), EXT(SSE2) } }, // #94 [ref=10x]
+  { 0, 0, { EXT(PCLMULQDQ) } }, // #95 [ref=1x]
+  { 0, 1, { EXT(SSE4_2) } }, // #96 [ref=4x]
+  { 0, 0, { EXT(PCONFIG) } }, // #97 [ref=1x]
+  { 0, 0, { EXT(MMX2), EXT(SSE2), EXT(SSE4_1) } }, // #98 [ref=1x]
+  { 0, 0, { EXT(3DNOW2) } }, // #99 [ref=5x]
+  { 0, 0, { EXT(GEODE) } }, // #100 [ref=2x]
+  { 0, 1, { EXT(POPCNT) } }, // #101 [ref=1x]
+  { 0, 24, { 0 } }, // #102 [ref=3x]
+  { 0, 1, { EXT(PREFETCHW) } }, // #103 [ref=1x]
+  { 0, 1, { EXT(PREFETCHWT1) } }, // #104 [ref=1x]
+  { 0, 20, { EXT(SNP) } }, // #105 [ref=3x]
+  { 0, 1, { EXT(SSE4_1) } }, // #106 [ref=1x]
+  { 0, 0, { EXT(PTWRITE) } }, // #107 [ref=1x]
+  { 0, 25, { 0 } }, // #108 [ref=3x]
+  { 0, 1, { EXT(SNP) } }, // #109 [ref=1x]
+  { 0, 26, { 0 } }, // #110 [ref=2x]
+  { 0, 0, { EXT(FSGSBASE) } }, // #111 [ref=4x]
+  { 0, 0, { EXT(MSR) } }, // #112 [ref=2x]
+  { 0, 0, { EXT(RDPID) } }, // #113 [ref=1x]
+  { 0, 0, { EXT(OSPKE) } }, // #114 [ref=1x]
+  { 0, 0, { EXT(RDPRU) } }, // #115 [ref=1x]
+  { 0, 1, { EXT(RDRAND) } }, // #116 [ref=1x]
+  { 0, 1, { EXT(RDSEED) } }, // #117 [ref=1x]
+  { 0, 0, { EXT(RDTSC) } }, // #118 [ref=1x]
+  { 0, 0, { EXT(RDTSCP) } }, // #119 [ref=1x]
+  { 0, 27, { 0 } }, // #120 [ref=2x]
+  { 0, 28, { EXT(LAHFSAHF) } }, // #121 [ref=1x]
+  { 0, 0, { EXT(SERIALIZE) } }, // #122 [ref=1x]
+  { 0, 0, { EXT(SHA) } }, // #123 [ref=7x]
+  { 0, 0, { EXT(SKINIT) } }, // #124 [ref=2x]
+  { 0, 0, { EXT(AMX_BF16) } }, // #125 [ref=1x]
+  { 0, 0, { EXT(AMX_INT8) } }, // #126 [ref=4x]
+  { 0, 1, { EXT(UINTR) } }, // #127 [ref=1x]
+  { 0, 1, { EXT(WAITPKG) } }, // #128 [ref=2x]
+  { 0, 0, { EXT(WAITPKG) } }, // #129 [ref=1x]
+  { 0, 0, { EXT(AVX512_4FMAPS) } }, // #130 [ref=4x]
+  { 0, 0, { EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL) } }, // #131 [ref=42x]
+  { 0, 0, { EXT(AVX512_FP16), EXT(AVX512_VL) } }, // #132 [ref=63x]
+  { 0, 0, { EXT(AVX), EXT(AVX512_F) } }, // #133 [ref=29x]
+  { 0, 0, { EXT(AVX512_FP16) } }, // #134 [ref=43x]
+  { 0, 0, { EXT(AVX) } }, // #135 [ref=35x]
+  { 0, 0, { EXT(AESNI), EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL), EXT(VAES) } }, // #136 [ref=4x]
+  { 0, 0, { EXT(AESNI), EXT(AVX) } }, // #137 [ref=2x]
+  { 0, 0, { EXT(AVX512_F), EXT(AVX512_VL) } }, // #138 [ref=108x]
+  { 0, 0, { EXT(AVX), EXT(AVX512_DQ), EXT(AVX512_VL) } }, // #139 [ref=8x]
+  { 0, 0, { EXT(AVX512_DQ), EXT(AVX512_VL) } }, // #140 [ref=30x]
+  { 0, 0, { EXT(AVX2) } }, // #141 [ref=7x]
+  { 0, 0, { EXT(AVX), EXT(AVX2), EXT(AVX512_F), EXT(AVX512_VL) } }, // #142 [ref=39x]
+  { 0, 1, { EXT(AVX), EXT(AVX512_F) } }, // #143 [ref=4x]
+  { 0, 0, { EXT(AVX512_BF16), EXT(AVX512_VL) } }, // #144 [ref=3x]
+  { 0, 0, { EXT(AVX512_F), EXT(AVX512_VL), EXT(F16C) } }, // #145 [ref=2x]
+  { 0, 0, { EXT(AVX512_BW), EXT(AVX512_VL) } }, // #146 [ref=24x]
+  { 0, 0, { EXT(AVX512_ERI) } }, // #147 [ref=10x]
+  { 0, 0, { EXT(AVX512_F), EXT(AVX512_VL), EXT(FMA) } }, // #148 [ref=36x]
+  { 0, 0, { EXT(AVX512_F), EXT(FMA) } }, // #149 [ref=24x]
+  { 0, 0, { EXT(FMA4) } }, // #150 [ref=20x]
+  { 0, 0, { EXT(XOP) } }, // #151 [ref=55x]
+  { 0, 0, { EXT(AVX2), EXT(AVX512_F), EXT(AVX512_VL) } }, // #152 [ref=19x]
+  { 0, 0, { EXT(AVX512_PFI) } }, // #153 [ref=16x]
+  { 0, 0, { EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL), EXT(GFNI) } }, // #154 [ref=3x]
+  { 1, 0, { EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL) } }, // #155 [ref=4x]
+  { 1, 0, { EXT(AVX) } }, // #156 [ref=2x]
+  { 1, 0, { EXT(AVX512_F), EXT(AVX512_VL) } }, // #157 [ref=4x]
+  { 1, 0, { EXT(AVX512_BW), EXT(AVX512_VL) } }, // #158 [ref=2x]
+  { 1, 0, { EXT(AVX), EXT(AVX512_F) } }, // #159 [ref=3x]
+  { 0, 0, { EXT(AVX), EXT(AVX2) } }, // #160 [ref=17x]
+  { 0, 0, { EXT(AVX512_VP2INTERSECT) } }, // #161 [ref=2x]
+  { 0, 0, { EXT(AVX512_4VNNIW) } }, // #162 [ref=2x]
+  { 0, 0, { EXT(AVX), EXT(AVX2), EXT(AVX512_BW), EXT(AVX512_VL) } }, // #163 [ref=54x]
+  { 0, 0, { EXT(AVX2), EXT(AVX512_BW), EXT(AVX512_VL) } }, // #164 [ref=2x]
+  { 0, 0, { EXT(AVX512_CDI), EXT(AVX512_VL) } }, // #165 [ref=6x]
+  { 0, 0, { EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL), EXT(PCLMULQDQ), EXT(VPCLMULQDQ) } }, // #166 [ref=1x]
+  { 0, 1, { EXT(AVX) } }, // #167 [ref=7x]
+  { 0, 0, { EXT(AVX512_VBMI2), EXT(AVX512_VL) } }, // #168 [ref=16x]
+  { 0, 0, { EXT(AVX512_VL), EXT(AVX512_VNNI), EXT(AVX_VNNI) } }, // #169 [ref=4x]
+  { 0, 0, { EXT(AVX512_VBMI), EXT(AVX512_VL) } }, // #170 [ref=4x]
+  { 0, 0, { EXT(AVX), EXT(AVX512_BW) } }, // #171 [ref=4x]
+  { 0, 0, { EXT(AVX), EXT(AVX512_DQ) } }, // #172 [ref=4x]
+  { 0, 0, { EXT(AVX512_IFMA), EXT(AVX512_VL) } }, // #173 [ref=2x]
+  { 0, 0, { EXT(AVX512_BITALG), EXT(AVX512_VL) } }, // #174 [ref=3x]
+  { 0, 0, { EXT(AVX512_VL), EXT(AVX512_VPOPCNTDQ) } }, // #175 [ref=2x]
+  { 0, 0, { EXT(WBNOINVD) } }, // #176 [ref=1x]
+  { 0, 0, { EXT(RTM) } }, // #177 [ref=3x]
+  { 0, 0, { EXT(XSAVE) } }, // #178 [ref=6x]
+  { 0, 0, { EXT(TSXLDTRK) } }, // #179 [ref=2x]
+  { 0, 0, { EXT(XSAVES) } }, // #180 [ref=4x]
+  { 0, 0, { EXT(XSAVEC) } }, // #181 [ref=2x]
+  { 0, 0, { EXT(XSAVEOPT) } }, // #182 [ref=2x]
+  { 0, 1, { EXT(TSX) } }  // #183 [ref=1x]
+};
+#undef EXT
+
+#define FLAG(VAL) uint32_t(CpuRWFlags::kX86_##VAL)
+const InstDB::RWFlagsInfoTable InstDB::_rwFlagsInfoTable[] = {
+  { 0, 0 }, // #0 [ref=1429x]
+  { 0, FLAG(AF) | FLAG(CF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #1 [ref=84x]
+  { FLAG(CF), FLAG(AF) | FLAG(CF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #2 [ref=2x]
+  { FLAG(CF), FLAG(CF) }, // #3 [ref=2x]
+  { FLAG(OF), FLAG(OF) }, // #4 [ref=1x]
+  { 0, FLAG(ZF) }, // #5 [ref=7x]
+  { 0, FLAG(AF) | FLAG(CF) | FLAG(OF) | FLAG(PF) | FLAG(SF) }, // #6 [ref=4x]
+  { 0, FLAG(AC) }, // #7 [ref=2x]
+  { 0, FLAG(CF) }, // #8 [ref=2x]
+  { 0, FLAG(DF) }, // #9 [ref=2x]
+  { 0, FLAG(IF) }, // #10 [ref=2x]
+  { FLAG(CF) | FLAG(ZF), 0 }, // #11 [ref=14x]
+  { FLAG(CF), 0 }, // #12 [ref=20x]
+  { FLAG(ZF), 0 }, // #13 [ref=16x]
+  { FLAG(OF) | FLAG(SF) | FLAG(ZF), 0 }, // #14 [ref=12x]
+  { FLAG(OF) | FLAG(SF), 0 }, // #15 [ref=12x]
+  { FLAG(OF), 0 }, // #16 [ref=7x]
+  { FLAG(PF), 0 }, // #17 [ref=14x]
+  { FLAG(SF), 0 }, // #18 [ref=6x]
+  { FLAG(DF), FLAG(AF) | FLAG(CF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #19 [ref=2x]
+  { 0, FLAG(AF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #20 [ref=5x]
+  { 0, FLAG(CF) | FLAG(PF) | FLAG(ZF) }, // #21 [ref=4x]
+  { FLAG(AF) | FLAG(CF) | FLAG(PF) | FLAG(SF) | FLAG(ZF), 0 }, // #22 [ref=1x]
+  { FLAG(DF), 0 }, // #23 [ref=3x]
+  { 0, FLAG(AF) | FLAG(CF) | FLAG(DF) | FLAG(IF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #24 [ref=3x]
+  { FLAG(AF) | FLAG(CF) | FLAG(DF) | FLAG(IF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF), 0 }, // #25 [ref=3x]
+  { FLAG(CF) | FLAG(OF), FLAG(CF) | FLAG(OF) }, // #26 [ref=2x]
+  { 0, FLAG(CF) | FLAG(OF) }, // #27 [ref=2x]
+  { 0, FLAG(AF) | FLAG(CF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }  // #28 [ref=1x]
+};
+#undef FLAG
+
+#define FLAG(VAL) uint32_t(InstRWFlags::k##VAL)
+const InstRWFlags InstDB::_instFlagsTable[] = {
+  InstRWFlags(FLAG(None)), // #0 [ref=1634x]
+  InstRWFlags(FLAG(MovOp))  // #1 [ref=29x]
+};
+#undef FLAG
+// ----------------------------------------------------------------------------
+// ${AdditionalInfoTable:End}
+
+// Inst - NameData
+// ===============
+
+#ifndef ASMJIT_NO_TEXT
+// ${NameData:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+const char InstDB::_nameData[] =
+  "\0" "aaa\0" "aad\0" "aam\0" "aas\0" "adc\0" "adcx\0" "adox\0" "arpl\0" "bextr\0" "blcfill\0" "blci\0" "blcic\0"
+  "blcmsk\0" "blcs\0" "blsfill\0" "blsi\0" "blsic\0" "blsmsk\0" "blsr\0" "bndcl\0" "bndcn\0" "bndcu\0" "bndldx\0"
+  "bndmk\0" "bndmov\0" "bndstx\0" "bound\0" "bsf\0" "bsr\0" "bswap\0" "bt\0" "btc\0" "btr\0" "bts\0" "bzhi\0" "cbw\0"
+  "cdq\0" "cdqe\0" "clac\0" "clc\0" "cld\0" "cldemote\0" "clflush\0" "clflushopt\0" "clgi\0" "cli\0" "clrssbsy\0"
+  "clts\0" "clui\0" "clwb\0" "clzero\0" "cmc\0" "cmova\0" "cmovae\0" "cmovc\0" "cmovg\0" "cmovge\0" "cmovl\0"
+  "cmovle\0" "cmovna\0" "cmovnae\0" "cmovnc\0" "cmovng\0" "cmovnge\0" "cmovnl\0" "cmovnle\0" "cmovno\0" "cmovnp\0"
+  "cmovns\0" "cmovnz\0" "cmovo\0" "cmovp\0" "cmovpe\0" "cmovpo\0" "cmovs\0" "cmovz\0" "cmp\0" "cmps\0" "cmpxchg\0"
+  "cmpxchg16b\0" "cmpxchg8b\0" "cpuid\0" "cqo\0" "crc32\0" "cvtpd2pi\0" "cvtpi2pd\0" "cvtpi2ps\0" "cvtps2pi\0"
+  "cvttpd2pi\0" "cvttps2pi\0" "cwd\0" "cwde\0" "daa\0" "das\0" "endbr32\0" "endbr64\0" "enqcmd\0" "enqcmds\0" "f2xm1\0"
+  "fabs\0" "faddp\0" "fbld\0" "fbstp\0" "fchs\0" "fclex\0" "fcmovb\0" "fcmovbe\0" "fcmove\0" "fcmovnb\0" "fcmovnbe\0"
+  "fcmovne\0" "fcmovnu\0" "fcmovu\0" "fcom\0" "fcomi\0" "fcomip\0" "fcomp\0" "fcompp\0" "fcos\0" "fdecstp\0" "fdiv\0"
+  "fdivp\0" "fdivr\0" "fdivrp\0" "femms\0" "ffree\0" "fiadd\0" "ficom\0" "ficomp\0" "fidiv\0" "fidivr\0" "fild\0"
+  "fimul\0" "fincstp\0" "finit\0" "fist\0" "fistp\0" "fisttp\0" "fisub\0" "fisubr\0" "fld\0" "fld1\0" "fldcw\0"
+  "fldenv\0" "fldl2e\0" "fldl2t\0" "fldlg2\0" "fldln2\0" "fldpi\0" "fldz\0" "fmulp\0" "fnclex\0" "fninit\0" "fnop\0"
+  "fnsave\0" "fnstcw\0" "fnstenv\0" "fnstsw\0" "fpatan\0" "fprem\0" "fprem1\0" "fptan\0" "frndint\0" "frstor\0"
+  "fsave\0" "fscale\0" "fsin\0" "fsincos\0" "fsqrt\0" "fst\0" "fstcw\0" "fstenv\0" "fstp\0" "fstsw\0" "fsubp\0"
+  "fsubrp\0" "ftst\0" "fucom\0" "fucomi\0" "fucomip\0" "fucomp\0" "fucompp\0" "fwait\0" "fxam\0" "fxch\0" "fxrstor\0"
+  "fxrstor64\0" "fxsave\0" "fxsave64\0" "fxtract\0" "fyl2x\0" "fyl2xp1\0" "getsec\0" "hlt\0" "hreset\0" "inc\0"
+  "incsspd\0" "incsspq\0" "insertq\0" "int3\0" "into\0" "invept\0" "invlpg\0" "invlpga\0" "invpcid\0" "invvpid\0"
+  "iretd\0" "iretq\0" "ja\0" "jae\0" "jb\0" "jbe\0" "jc\0" "je\0" "jecxz\0" "jg\0" "jge\0" "jl\0" "jle\0" "jna\0"
+  "jnae\0" "jnb\0" "jnbe\0" "jnc\0" "jne\0" "jng\0" "jnge\0" "jnl\0" "jnle\0" "jno\0" "jnp\0" "jns\0" "jnz\0" "jo\0"
+  "jp\0" "jpe\0" "jpo\0" "js\0" "jz\0" "kaddb\0" "kaddd\0" "kaddq\0" "kaddw\0" "kandb\0" "kandd\0" "kandnb\0"
+  "kandnd\0" "kandnq\0" "kandnw\0" "kandq\0" "kandw\0" "kmovb\0" "kmovw\0" "knotb\0" "knotd\0" "knotq\0" "knotw\0"
+  "korb\0" "kord\0" "korq\0" "kortestb\0" "kortestd\0" "kortestq\0" "kortestw\0" "korw\0" "kshiftlb\0" "kshiftld\0"
+  "kshiftlq\0" "kshiftlw\0" "kshiftrb\0" "kshiftrd\0" "kshiftrq\0" "kshiftrw\0" "ktestb\0" "ktestd\0" "ktestq\0"
+  "ktestw\0" "kunpckbw\0" "kunpckdq\0" "kunpckwd\0" "kxnorb\0" "kxnord\0" "kxnorq\0" "kxnorw\0" "kxorb\0" "kxord\0"
+  "kxorq\0" "kxorw\0" "lahf\0" "lar\0" "lcall\0" "lds\0" "ldtilecfg\0" "lea\0" "leave\0" "les\0" "lfence\0" "lfs\0"
+  "lgdt\0" "lgs\0" "lidt\0" "ljmp\0" "lldt\0" "llwpcb\0" "lmsw\0" "lods\0" "loop\0" "loope\0" "loopne\0" "lsl\0"
+  "ltr\0" "lwpins\0" "lwpval\0" "lzcnt\0" "mcommit\0" "mfence\0" "monitorx\0" "movabs\0" "movdir64b\0" "movdiri\0"
+  "movdq2q\0" "movnti\0" "movntq\0" "movntsd\0" "movntss\0" "movq2dq\0" "movsx\0" "movsxd\0" "movzx\0" "mulx\0"
+  "mwaitx\0" "neg\0" "not\0" "out\0" "outs\0" "pavgusb\0" "pconfig\0" "pdep\0" "pext\0" "pf2id\0" "pf2iw\0" "pfacc\0"
+  "pfadd\0" "pfcmpeq\0" "pfcmpge\0" "pfcmpgt\0" "pfmax\0" "pfmin\0" "pfmul\0" "pfnacc\0" "pfpnacc\0" "pfrcp\0"
+  "pfrcpit1\0" "pfrcpit2\0" "pfrcpv\0" "pfrsqit1\0" "pfrsqrt\0" "pfrsqrtv\0" "pfsub\0" "pfsubr\0" "pi2fd\0" "pi2fw\0"
+  "pmulhrw\0" "pop\0" "popa\0" "popad\0" "popcnt\0" "popf\0" "popfd\0" "popfq\0" "prefetch\0" "prefetchnta\0"
+  "prefetcht0\0" "prefetcht1\0" "prefetcht2\0" "prefetchw\0" "prefetchwt1\0" "pshufw\0" "psmash\0" "pswapd\0"
+  "ptwrite\0" "push\0" "pusha\0" "pushad\0" "pushf\0" "pushfd\0" "pushfq\0" "pvalidate\0" "rcl\0" "rcr\0" "rdfsbase\0"
+  "rdgsbase\0" "rdmsr\0" "rdpid\0" "rdpkru\0" "rdpmc\0" "rdpru\0" "rdrand\0" "rdseed\0" "rdsspd\0" "rdsspq\0" "rdtsc\0"
+  "rdtscp\0" "retf\0" "rmpadjust\0" "rmpupdate\0" "rol\0" "ror\0" "rorx\0" "rsm\0" "rstorssp\0" "sahf\0" "sal\0"
+  "sar\0" "sarx\0" "saveprevssp\0" "sbb\0" "scas\0" "senduipi\0" "serialize\0" "seta\0" "setae\0" "setb\0" "setbe\0"
+  "setc\0" "sete\0" "setg\0" "setge\0" "setl\0" "setle\0" "setna\0" "setnae\0" "setnb\0" "setnbe\0" "setnc\0" "setne\0"
+  "setng\0" "setnge\0" "setnl\0" "setnle\0" "setno\0" "setnp\0" "setns\0" "setnz\0" "seto\0" "setp\0" "setpe\0"
+  "setpo\0" "sets\0" "setssbsy\0" "setz\0" "sfence\0" "sgdt\0" "sha1msg1\0" "sha1msg2\0" "sha1nexte\0" "sha1rnds4\0"
+  "sha256msg1\0" "sha256msg2\0" "sha256rnds2\0" "shl\0" "shlx\0" "shr\0" "shrd\0" "shrx\0" "sidt\0" "skinit\0" "sldt\0"
+  "slwpcb\0" "smsw\0" "stac\0" "stc\0" "stgi\0" "sti\0" "stos\0" "str\0" "sttilecfg\0" "swapgs\0" "syscall\0"
+  "sysenter\0" "sysexit\0" "sysexitq\0" "sysret\0" "sysretq\0" "t1mskc\0" "tdpbf16ps\0" "tdpbssd\0" "tdpbsud\0"
+  "tdpbusd\0" "tdpbuud\0" "testui\0" "tileloadd\0" "tileloaddt1\0" "tilerelease\0" "tilestored\0" "tilezero\0"
+  "tpause\0" "tzcnt\0" "tzmsk\0" "ud0\0" "ud1\0" "ud2\0" "uiret\0" "umonitor\0" "umwait\0" "v4fmaddps\0" "v4fmaddss\0"
+  "v4fnmaddps\0" "v4fnmaddss\0" "vaddpd\0" "vaddph\0" "vaddps\0" "vaddsd\0" "vaddsh\0" "vaddss\0" "vaddsubpd\0"
+  "vaddsubps\0" "vaesdec\0" "vaesdeclast\0" "vaesenc\0" "vaesenclast\0" "vaesimc\0" "vaeskeygenassist\0" "valignd\0"
+  "valignq\0" "vandnpd\0" "vandnps\0" "vandpd\0" "vandps\0" "vblendmpd\0" "vblendmps\0" "vblendpd\0" "vblendps\0"
+  "vblendvpd\0" "vblendvps\0" "vbroadcastf128\0" "vbroadcastf32x2\0" "vbroadcastf32x4\0" "vbroadcastf32x8\0"
+  "vbroadcastf64x2\0" "vbroadcastf64x4\0" "vbroadcasti128\0" "vbroadcasti32x2\0" "vbroadcasti32x4\0"
+  "vbroadcasti32x8\0" "vbroadcasti64x2\0" "vbroadcasti64x4\0" "vbroadcastsd\0" "vbroadcastss\0" "vcmppd\0" "vcmpph\0"
+  "vcmpps\0" "vcmpsd\0" "vcmpsh\0" "vcmpss\0" "vcomisd\0" "vcomish\0" "vcomiss\0" "vcompresspd\0" "vcompressps\0"
+  "vcvtdq2pd\0" "vcvtdq2ph\0" "vcvtdq2ps\0" "vcvtne2ps2bf16\0" "vcvtneps2bf16\0" "vcvtpd2dq\0" "vcvtpd2ph\0"
+  "vcvtpd2ps\0" "vcvtpd2qq\0" "vcvtpd2udq\0" "vcvtpd2uqq\0" "vcvtph2dq\0" "vcvtph2pd\0" "vcvtph2ps\0" "vcvtph2psx\0"
+  "vcvtph2qq\0" "vcvtph2udq\0" "vcvtph2uqq\0" "vcvtph2uw\0" "vcvtph2w\0" "vcvtps2dq\0" "vcvtps2pd\0" "vcvtps2ph\0"
+  "vcvtps2phx\0" "vcvtps2qq\0" "vcvtps2udq\0" "vcvtps2uqq\0" "vcvtqq2pd\0" "vcvtqq2ph\0" "vcvtqq2ps\0" "vcvtsd2sh\0"
+  "vcvtsd2si\0" "vcvtsd2ss\0" "vcvtsd2usi\0" "vcvtsh2sd\0" "vcvtsh2si\0" "vcvtsh2ss\0" "vcvtsh2usi\0" "vcvtsi2sd\0"
+  "vcvtsi2sh\0" "vcvtsi2ss\0" "vcvtss2sd\0" "vcvtss2sh\0" "vcvtss2si\0" "vcvtss2usi\0" "vcvttpd2dq\0" "vcvttpd2qq\0"
+  "vcvttpd2udq\0" "vcvttpd2uqq\0" "vcvttph2dq\0" "vcvttph2qq\0" "vcvttph2udq\0" "vcvttph2uqq\0" "vcvttph2uw\0"
+  "vcvttph2w\0" "vcvttps2dq\0" "vcvttps2qq\0" "vcvttps2udq\0" "vcvttps2uqq\0" "vcvttsd2si\0" "vcvttsd2usi\0"
+  "vcvttsh2si\0" "vcvttsh2usi\0" "vcvttss2si\0" "vcvttss2usi\0" "vcvtudq2pd\0" "vcvtudq2ph\0" "vcvtudq2ps\0"
+  "vcvtuqq2pd\0" "vcvtuqq2ph\0" "vcvtuqq2ps\0" "vcvtusi2sd\0" "vcvtusi2sh\0" "vcvtusi2ss\0" "vcvtuw2ph\0" "vcvtw2ph\0"
+  "vdbpsadbw\0" "vdivpd\0" "vdivph\0" "vdivps\0" "vdivsd\0" "vdivsh\0" "vdivss\0" "vdpbf16ps\0" "vdppd\0" "vdpps\0"
+  "verr\0" "verw\0" "vexp2pd\0" "vexp2ps\0" "vexpandpd\0" "vexpandps\0" "vextractf128\0" "vextractf32x4\0"
+  "vextractf32x8\0" "vextractf64x2\0" "vextractf64x4\0" "vextracti128\0" "vextracti32x4\0" "vextracti32x8\0"
+  "vextracti64x2\0" "vextracti64x4\0" "vextractps\0" "vfcmaddcph\0" "vfcmaddcsh\0" "vfcmulcph\0" "vfcmulcsh\0"
+  "vfixupimmpd\0" "vfixupimmps\0" "vfixupimmsd\0" "vfixupimmss\0" "vfmadd132pd\0" "vfmadd132ph\0" "vfmadd132ps\0"
+  "vfmadd132sd\0" "vfmadd132sh\0" "vfmadd132ss\0" "vfmadd213pd\0" "vfmadd213ph\0" "vfmadd213ps\0" "vfmadd213sd\0"
+  "vfmadd213sh\0" "vfmadd213ss\0" "vfmadd231pd\0" "vfmadd231ph\0" "vfmadd231ps\0" "vfmadd231sd\0" "vfmadd231sh\0"
+  "vfmadd231ss\0" "vfmaddcph\0" "vfmaddcsh\0" "vfmaddpd\0" "vfmaddps\0" "vfmaddsd\0" "vfmaddss\0" "vfmaddsub132pd\0"
+  "vfmaddsub132ph\0" "vfmaddsub132ps\0" "vfmaddsub213pd\0" "vfmaddsub213ph\0" "vfmaddsub213ps\0" "vfmaddsub231pd\0"
+  "vfmaddsub231ph\0" "vfmaddsub231ps\0" "vfmaddsubpd\0" "vfmaddsubps\0" "vfmsub132pd\0" "vfmsub132ph\0" "vfmsub132ps\0"
+  "vfmsub132sd\0" "vfmsub132sh\0" "vfmsub132ss\0" "vfmsub213pd\0" "vfmsub213ph\0" "vfmsub213ps\0" "vfmsub213sd\0"
+  "vfmsub213sh\0" "vfmsub213ss\0" "vfmsub231pd\0" "vfmsub231ph\0" "vfmsub231ps\0" "vfmsub231sd\0" "vfmsub231sh\0"
+  "vfmsub231ss\0" "vfmsubadd132pd\0" "vfmsubadd132ph\0" "vfmsubadd132ps\0" "vfmsubadd213pd\0" "vfmsubadd213ph\0"
+  "vfmsubadd213ps\0" "vfmsubadd231pd\0" "vfmsubadd231ph\0" "vfmsubadd231ps\0" "vfmsubaddpd\0" "vfmsubaddps\0"
+  "vfmsubpd\0" "vfmsubps\0" "vfmsubsd\0" "vfmsubss\0" "vfmulcph\0" "vfmulcsh\0" "vfnmadd132pd\0" "vfnmadd132ph\0"
+  "vfnmadd132ps\0" "vfnmadd132sd\0" "vfnmadd132sh\0" "vfnmadd132ss\0" "vfnmadd213pd\0" "vfnmadd213ph\0"
+  "vfnmadd213ps\0" "vfnmadd213sd\0" "vfnmadd213sh\0" "vfnmadd213ss\0" "vfnmadd231pd\0" "vfnmadd231ph\0"
+  "vfnmadd231ps\0" "vfnmadd231sd\0" "vfnmadd231sh\0" "vfnmadd231ss\0" "vfnmaddpd\0" "vfnmaddps\0" "vfnmaddsd\0"
+  "vfnmaddss\0" "vfnmsub132pd\0" "vfnmsub132ph\0" "vfnmsub132ps\0" "vfnmsub132sd\0" "vfnmsub132sh\0" "vfnmsub132ss\0"
+  "vfnmsub213pd\0" "vfnmsub213ph\0" "vfnmsub213ps\0" "vfnmsub213sd\0" "vfnmsub213sh\0" "vfnmsub213ss\0"
+  "vfnmsub231pd\0" "vfnmsub231ph\0" "vfnmsub231ps\0" "vfnmsub231sd\0" "vfnmsub231sh\0" "vfnmsub231ss\0" "vfnmsubpd\0"
+  "vfnmsubps\0" "vfnmsubsd\0" "vfnmsubss\0" "vfpclasspd\0" "vfpclassph\0" "vfpclassps\0" "vfpclasssd\0" "vfpclasssh\0"
+  "vfpclassss\0" "vfrczpd\0" "vfrczps\0" "vfrczsd\0" "vfrczss\0" "vgatherdpd\0" "vgatherdps\0" "vgatherpf0dpd\0"
+  "vgatherpf0dps\0" "vgatherpf0qpd\0" "vgatherpf0qps\0" "vgatherpf1dpd\0" "vgatherpf1dps\0" "vgatherpf1qpd\0"
+  "vgatherpf1qps\0" "vgatherqpd\0" "vgatherqps\0" "vgetexppd\0" "vgetexpph\0" "vgetexpps\0" "vgetexpsd\0" "vgetexpsh\0"
+  "vgetexpss\0" "vgetmantpd\0" "vgetmantph\0" "vgetmantps\0" "vgetmantsd\0" "vgetmantsh\0" "vgetmantss\0"
+  "vgf2p8affineinvqb\0" "vgf2p8affineqb\0" "vgf2p8mulb\0" "vhaddpd\0" "vhaddps\0" "vhsubpd\0" "vhsubps\0"
+  "vinsertf128\0" "vinsertf32x4\0" "vinsertf32x8\0" "vinsertf64x2\0" "vinsertf64x4\0" "vinserti128\0" "vinserti32x4\0"
+  "vinserti32x8\0" "vinserti64x2\0" "vinserti64x4\0" "vinsertps\0" "vlddqu\0" "vldmxcsr\0" "vmaskmovdqu\0"
+  "vmaskmovpd\0" "vmaskmovps\0" "vmaxpd\0" "vmaxph\0" "vmaxps\0" "vmaxsd\0" "vmaxsh\0" "vmaxss\0" "vmcall\0"
+  "vmclear\0" "vmfunc\0" "vminpd\0" "vminph\0" "vminps\0" "vminsd\0" "vminsh\0" "vminss\0" "vmlaunch\0" "vmload\0"
+  "vmmcall\0" "vmovapd\0" "vmovaps\0" "vmovd\0" "vmovddup\0" "vmovdqa\0" "vmovdqa32\0" "vmovdqa64\0" "vmovdqu\0"
+  "vmovdqu16\0" "vmovdqu32\0" "vmovdqu64\0" "vmovdqu8\0" "vmovhlps\0" "vmovhpd\0" "vmovhps\0" "vmovlhps\0" "vmovlpd\0"
+  "vmovlps\0" "vmovmskpd\0" "vmovmskps\0" "vmovntdq\0" "vmovntdqa\0" "vmovntpd\0" "vmovntps\0" "vmovq\0" "vmovsd\0"
+  "vmovsh\0" "vmovshdup\0" "vmovsldup\0" "vmovss\0" "vmovupd\0" "vmovups\0" "vmovw\0" "vmpsadbw\0" "vmptrld\0"
+  "vmptrst\0" "vmread\0" "vmresume\0" "vmrun\0" "vmsave\0" "vmulpd\0" "vmulph\0" "vmulps\0" "vmulsd\0" "vmulsh\0"
+  "vmulss\0" "vmwrite\0" "vmxon\0" "vorpd\0" "vorps\0" "vp2intersectd\0" "vp2intersectq\0" "vp4dpwssd\0" "vp4dpwssds\0"
+  "vpabsb\0" "vpabsd\0" "vpabsq\0" "vpabsw\0" "vpackssdw\0" "vpacksswb\0" "vpackusdw\0" "vpackuswb\0" "vpaddb\0"
+  "vpaddd\0" "vpaddq\0" "vpaddsb\0" "vpaddsw\0" "vpaddusb\0" "vpaddusw\0" "vpaddw\0" "vpalignr\0" "vpand\0" "vpandd\0"
+  "vpandn\0" "vpandnd\0" "vpandnq\0" "vpandq\0" "vpavgb\0" "vpavgw\0" "vpblendd\0" "vpblendmb\0" "vpblendmd\0"
+  "vpblendmq\0" "vpblendmw\0" "vpblendvb\0" "vpblendw\0" "vpbroadcastb\0" "vpbroadcastd\0" "vpbroadcastmb2q\0"
+  "vpbroadcastmw2d\0" "vpbroadcastq\0" "vpbroadcastw\0" "vpclmulqdq\0" "vpcmov\0" "vpcmpb\0" "vpcmpd\0" "vpcmpeqb\0"
+  "vpcmpeqd\0" "vpcmpeqq\0" "vpcmpeqw\0" "vpcmpestri\0" "vpcmpestrm\0" "vpcmpgtb\0" "vpcmpgtd\0" "vpcmpgtq\0"
+  "vpcmpgtw\0" "vpcmpistri\0" "vpcmpistrm\0" "vpcmpq\0" "vpcmpub\0" "vpcmpud\0" "vpcmpuq\0" "vpcmpuw\0" "vpcmpw\0"
+  "vpcomb\0" "vpcomd\0" "vpcompressb\0" "vpcompressd\0" "vpcompressq\0" "vpcompressw\0" "vpcomq\0" "vpcomub\0"
+  "vpcomud\0" "vpcomuq\0" "vpcomuw\0" "vpcomw\0" "vpconflictd\0" "vpconflictq\0" "vpdpbusd\0" "vpdpbusds\0"
+  "vpdpwssd\0" "vpdpwssds\0" "vperm2f128\0" "vperm2i128\0" "vpermb\0" "vpermd\0" "vpermi2b\0" "vpermi2d\0"
+  "vpermi2pd\0" "vpermi2ps\0" "vpermi2q\0" "vpermi2w\0" "vpermil2pd\0" "vpermil2ps\0" "vpermilpd\0" "vpermilps\0"
+  "vpermpd\0" "vpermps\0" "vpermq\0" "vpermt2b\0" "vpermt2d\0" "vpermt2pd\0" "vpermt2ps\0" "vpermt2q\0" "vpermt2w\0"
+  "vpermw\0" "vpexpandb\0" "vpexpandd\0" "vpexpandq\0" "vpexpandw\0" "vpextrb\0" "vpextrd\0" "vpextrq\0" "vpextrw\0"
+  "vpgatherdd\0" "vpgatherdq\0" "vpgatherqd\0" "vpgatherqq\0" "vphaddbd\0" "vphaddbq\0" "vphaddbw\0" "vphaddd\0"
+  "vphadddq\0" "vphaddsw\0" "vphaddubd\0" "vphaddubq\0" "vphaddubw\0" "vphaddudq\0" "vphadduwd\0" "vphadduwq\0"
+  "vphaddw\0" "vphaddwd\0" "vphaddwq\0" "vphminposuw\0" "vphsubbw\0" "vphsubd\0" "vphsubdq\0" "vphsubsw\0" "vphsubw\0"
+  "vphsubwd\0" "vpinsrb\0" "vpinsrd\0" "vpinsrq\0" "vpinsrw\0" "vplzcntd\0" "vplzcntq\0" "vpmacsdd\0" "vpmacsdqh\0"
+  "vpmacsdql\0" "vpmacssdd\0" "vpmacssdqh\0" "vpmacssdql\0" "vpmacsswd\0" "vpmacssww\0" "vpmacswd\0" "vpmacsww\0"
+  "vpmadcsswd\0" "vpmadcswd\0" "vpmadd52huq\0" "vpmadd52luq\0" "vpmaddubsw\0" "vpmaddwd\0" "vpmaskmovd\0"
+  "vpmaskmovq\0" "vpmaxsb\0" "vpmaxsd\0" "vpmaxsq\0" "vpmaxsw\0" "vpmaxub\0" "vpmaxud\0" "vpmaxuq\0" "vpmaxuw\0"
+  "vpminsb\0" "vpminsd\0" "vpminsq\0" "vpminsw\0" "vpminub\0" "vpminud\0" "vpminuq\0" "vpminuw\0" "vpmovb2m\0"
+  "vpmovd2m\0" "vpmovdb\0" "vpmovdw\0" "vpmovm2b\0" "vpmovm2d\0" "vpmovm2q\0" "vpmovm2w\0" "vpmovmskb\0" "vpmovq2m\0"
+  "vpmovqb\0" "vpmovqd\0" "vpmovqw\0" "vpmovsdb\0" "vpmovsdw\0" "vpmovsqb\0" "vpmovsqd\0" "vpmovsqw\0" "vpmovswb\0"
+  "vpmovsxbd\0" "vpmovsxbq\0" "vpmovsxbw\0" "vpmovsxdq\0" "vpmovsxwd\0" "vpmovsxwq\0" "vpmovusdb\0" "vpmovusdw\0"
+  "vpmovusqb\0" "vpmovusqd\0" "vpmovusqw\0" "vpmovuswb\0" "vpmovw2m\0" "vpmovwb\0" "vpmovzxbd\0" "vpmovzxbq\0"
+  "vpmovzxbw\0" "vpmovzxdq\0" "vpmovzxwd\0" "vpmovzxwq\0" "vpmuldq\0" "vpmulhrsw\0" "vpmulhuw\0" "vpmulhw\0"
+  "vpmulld\0" "vpmullq\0" "vpmullw\0" "vpmultishiftqb\0" "vpmuludq\0" "vpopcntb\0" "vpopcntd\0" "vpopcntq\0"
+  "vpopcntw\0" "vpor\0" "vpord\0" "vporq\0" "vpperm\0" "vprold\0" "vprolq\0" "vprolvd\0" "vprolvq\0" "vprord\0"
+  "vprorq\0" "vprorvd\0" "vprorvq\0" "vprotb\0" "vprotd\0" "vprotq\0" "vprotw\0" "vpsadbw\0" "vpscatterdd\0"
+  "vpscatterdq\0" "vpscatterqd\0" "vpscatterqq\0" "vpshab\0" "vpshad\0" "vpshaq\0" "vpshaw\0" "vpshlb\0" "vpshld\0"
+  "vpshldd\0" "vpshldq\0" "vpshldvd\0" "vpshldvq\0" "vpshldvw\0" "vpshldw\0" "vpshlq\0" "vpshlw\0" "vpshrdd\0"
+  "vpshrdq\0" "vpshrdvd\0" "vpshrdvq\0" "vpshrdvw\0" "vpshrdw\0" "vpshufb\0" "vpshufbitqmb\0" "vpshufd\0" "vpshufhw\0"
+  "vpshuflw\0" "vpsignb\0" "vpsignd\0" "vpsignw\0" "vpslld\0" "vpslldq\0" "vpsllq\0" "vpsllvd\0" "vpsllvq\0"
+  "vpsllvw\0" "vpsllw\0" "vpsrad\0" "vpsraq\0" "vpsravd\0" "vpsravq\0" "vpsravw\0" "vpsraw\0" "vpsrld\0" "vpsrldq\0"
+  "vpsrlq\0" "vpsrlvd\0" "vpsrlvq\0" "vpsrlvw\0" "vpsrlw\0" "vpsubb\0" "vpsubd\0" "vpsubq\0" "vpsubsb\0" "vpsubsw\0"
+  "vpsubusb\0" "vpsubusw\0" "vpsubw\0" "vpternlogd\0" "vpternlogq\0" "vptest\0" "vptestmb\0" "vptestmd\0" "vptestmq\0"
+  "vptestmw\0" "vptestnmb\0" "vptestnmd\0" "vptestnmq\0" "vptestnmw\0" "vpunpckhbw\0" "vpunpckhdq\0" "vpunpckhqdq\0"
+  "vpunpckhwd\0" "vpunpcklbw\0" "vpunpckldq\0" "vpunpcklqdq\0" "vpunpcklwd\0" "vpxor\0" "vpxord\0" "vpxorq\0"
+  "vrangepd\0" "vrangeps\0" "vrangesd\0" "vrangess\0" "vrcp14pd\0" "vrcp14ps\0" "vrcp14sd\0" "vrcp14ss\0" "vrcp28pd\0"
+  "vrcp28ps\0" "vrcp28sd\0" "vrcp28ss\0" "vrcpph\0" "vrcpps\0" "vrcpsh\0" "vrcpss\0" "vreducepd\0" "vreduceph\0"
+  "vreduceps\0" "vreducesd\0" "vreducesh\0" "vreducess\0" "vrndscalepd\0" "vrndscaleph\0" "vrndscaleps\0"
+  "vrndscalesd\0" "vrndscalesh\0" "vrndscaless\0" "vroundpd\0" "vroundps\0" "vroundsd\0" "vroundss\0" "vrsqrt14pd\0"
+  "vrsqrt14ps\0" "vrsqrt14sd\0" "vrsqrt14ss\0" "vrsqrt28pd\0" "vrsqrt28ps\0" "vrsqrt28sd\0" "vrsqrt28ss\0" "vrsqrtph\0"
+  "vrsqrtps\0" "vrsqrtsh\0" "vrsqrtss\0" "vscalefpd\0" "vscalefph\0" "vscalefps\0" "vscalefsd\0" "vscalefsh\0"
+  "vscalefss\0" "vscatterdpd\0" "vscatterdps\0" "vscatterpf0dpd\0" "vscatterpf0dps\0" "vscatterpf0qpd\0"
+  "vscatterpf0qps\0" "vscatterpf1dpd\0" "vscatterpf1dps\0" "vscatterpf1qpd\0" "vscatterpf1qps\0" "vscatterqpd\0"
+  "vscatterqps\0" "vshuff32x4\0" "vshuff64x2\0" "vshufi32x4\0" "vshufi64x2\0" "vshufpd\0" "vshufps\0" "vsqrtpd\0"
+  "vsqrtph\0" "vsqrtps\0" "vsqrtsd\0" "vsqrtsh\0" "vsqrtss\0" "vstmxcsr\0" "vsubpd\0" "vsubph\0" "vsubps\0" "vsubsd\0"
+  "vsubsh\0" "vsubss\0" "vtestpd\0" "vtestps\0" "vucomisd\0" "vucomish\0" "vucomiss\0" "vunpckhpd\0" "vunpckhps\0"
+  "vunpcklpd\0" "vunpcklps\0" "vxorpd\0" "vxorps\0" "vzeroall\0" "vzeroupper\0" "wbinvd\0" "wbnoinvd\0" "wrfsbase\0"
+  "wrgsbase\0" "wrmsr\0" "wrssd\0" "wrssq\0" "wrussd\0" "wrussq\0" "xabort\0" "xadd\0" "xbegin\0" "xend\0" "xgetbv\0"
+  "xlatb\0" "xresldtrk\0" "xrstors\0" "xrstors64\0" "xsavec\0" "xsavec64\0" "xsaveopt\0" "xsaveopt64\0" "xsaves\0"
+  "xsaves64\0" "xsetbv\0" "xsusldtrk\0" "xtest";
+
+const InstDB::InstNameIndex InstDB::instNameIndex[26] = {
+  { Inst::kIdAaa          , Inst::kIdArpl          + 1 },
+  { Inst::kIdBextr        , Inst::kIdBzhi          + 1 },
+  { Inst::kIdCall         , Inst::kIdCwde          + 1 },
+  { Inst::kIdDaa          , Inst::kIdDpps          + 1 },
+  { Inst::kIdEmms         , Inst::kIdExtrq         + 1 },
+  { Inst::kIdF2xm1        , Inst::kIdFyl2xp1       + 1 },
+  { Inst::kIdGetsec       , Inst::kIdGf2p8mulb     + 1 },
+  { Inst::kIdHaddpd       , Inst::kIdHsubps        + 1 },
+  { Inst::kIdIdiv         , Inst::kIdIretq         + 1 },
+  { Inst::kIdJa           , Inst::kIdJz            + 1 },
+  { Inst::kIdKaddb        , Inst::kIdKxorw         + 1 },
+  { Inst::kIdLahf         , Inst::kIdLzcnt         + 1 },
+  { Inst::kIdMaskmovdqu   , Inst::kIdMwaitx        + 1 },
+  { Inst::kIdNeg          , Inst::kIdNot           + 1 },
+  { Inst::kIdOr           , Inst::kIdOuts          + 1 },
+  { Inst::kIdPabsb        , Inst::kIdPxor          + 1 },
+  { Inst::kIdNone         , Inst::kIdNone          + 1 },
+  { Inst::kIdRcl          , Inst::kIdRstorssp      + 1 },
+  { Inst::kIdSahf         , Inst::kIdSysretq       + 1 },
+  { Inst::kIdT1mskc       , Inst::kIdTzmsk         + 1 },
+  { Inst::kIdUcomisd      , Inst::kIdUnpcklps      + 1 },
+  { Inst::kIdV4fmaddps    , Inst::kIdVzeroupper    + 1 },
+  { Inst::kIdWbinvd       , Inst::kIdWrussq        + 1 },
+  { Inst::kIdXabort       , Inst::kIdXtest         + 1 },
+  { Inst::kIdNone         , Inst::kIdNone          + 1 },
+  { Inst::kIdNone         , Inst::kIdNone          + 1 }
+};
+// ----------------------------------------------------------------------------
+// ${NameData:End}
+#endif // !ASMJIT_NO_TEXT
+
+// x86::InstDB - InstSignature & OpSignature
+// =========================================
+
+#ifndef ASMJIT_NO_VALIDATION
+// ${InstSignatureTable:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+#define ROW(count, x86, x64, implicit, o0, o1, o2, o3, o4, o5)       \
+  { count, uint8_t(x86 ? uint8_t(InstDB::Mode::kX86) : uint8_t(0)) | \
+                  (x64 ? uint8_t(InstDB::Mode::kX64) : uint8_t(0)) , \
+    implicit,                                                        \
+    0,                                                               \
+    { o0, o1, o2, o3, o4, o5 }                                       \
+  }
+const InstDB::InstSignature InstDB::_instSignatureTable[] = {
+  ROW(2, 1, 1, 0, 1  , 2  , 0  , 0  , 0  , 0  ), // #0   {r8lo|r8hi|m8|mem, r8lo|r8hi}
+  ROW(2, 1, 1, 0, 3  , 4  , 0  , 0  , 0  , 0  ), //      {r16|m16|mem|sreg, r16}
+  ROW(2, 1, 1, 0, 5  , 6  , 0  , 0  , 0  , 0  ), //      {r32|m32|mem|sreg, r32}
+  ROW(2, 0, 1, 0, 7  , 8  , 0  , 0  , 0  , 0  ), //      {r64|m64|mem|sreg|creg|dreg, r64}
+  ROW(2, 1, 1, 0, 9  , 10 , 0  , 0  , 0  , 0  ), //      {r8lo|r8hi|m8, i8|u8}
+  ROW(2, 1, 1, 0, 11 , 12 , 0  , 0  , 0  , 0  ), //      {r16|m16, i16|u16}
+  ROW(2, 1, 1, 0, 13 , 14 , 0  , 0  , 0  , 0  ), //      {r32|m32, i32|u32}
+  ROW(2, 0, 1, 0, 15 , 16 , 0  , 0  , 0  , 0  ), //      {r64|m64|mem, i32}
+  ROW(2, 0, 1, 0, 8  , 17 , 0  , 0  , 0  , 0  ), //      {r64, i64|u64|m64|mem|sreg|creg|dreg}
+  ROW(2, 1, 1, 0, 2  , 18 , 0  , 0  , 0  , 0  ), //      {r8lo|r8hi, m8|mem}
+  ROW(2, 1, 1, 0, 4  , 19 , 0  , 0  , 0  , 0  ), //      {r16, m16|mem|sreg}
+  ROW(2, 1, 1, 0, 6  , 20 , 0  , 0  , 0  , 0  ), //      {r32, m32|mem|sreg}
+  ROW(2, 1, 1, 0, 21 , 22 , 0  , 0  , 0  , 0  ), //      {m16|mem, sreg}
+  ROW(2, 1, 1, 0, 22 , 21 , 0  , 0  , 0  , 0  ), //      {sreg, m16|mem}
+  ROW(2, 1, 0, 0, 6  , 23 , 0  , 0  , 0  , 0  ), //      {r32, creg|dreg}
+  ROW(2, 1, 0, 0, 23 , 6  , 0  , 0  , 0  , 0  ), //      {creg|dreg, r32}
+  ROW(2, 1, 1, 0, 9  , 10 , 0  , 0  , 0  , 0  ), // #16  {r8lo|r8hi|m8, i8|u8}
+  ROW(2, 1, 1, 0, 11 , 12 , 0  , 0  , 0  , 0  ), //      {r16|m16, i16|u16}
+  ROW(2, 1, 1, 0, 13 , 14 , 0  , 0  , 0  , 0  ), //      {r32|m32, i32|u32}
+  ROW(2, 0, 1, 0, 15 , 24 , 0  , 0  , 0  , 0  ), //      {r64|m64|mem, i32|r64}
+  ROW(2, 1, 1, 0, 25 , 26 , 0  , 0  , 0  , 0  ), //      {r16|m16|r32|m32|r64|m64|mem, i8}
+  ROW(2, 1, 1, 0, 1  , 2  , 0  , 0  , 0  , 0  ), //      {r8lo|r8hi|m8|mem, r8lo|r8hi}
+  ROW(2, 1, 1, 0, 27 , 4  , 0  , 0  , 0  , 0  ), //      {r16|m16|mem, r16}
+  ROW(2, 1, 1, 0, 28 , 6  , 0  , 0  , 0  , 0  ), // #23  {r32|m32|mem, r32}
+  ROW(2, 1, 1, 0, 2  , 18 , 0  , 0  , 0  , 0  ), //      {r8lo|r8hi, m8|mem}
+  ROW(2, 1, 1, 0, 4  , 21 , 0  , 0  , 0  , 0  ), //      {r16, m16|mem}
+  ROW(2, 1, 1, 0, 6  , 29 , 0  , 0  , 0  , 0  ), //      {r32, m32|mem}
+  ROW(2, 0, 1, 0, 8  , 30 , 0  , 0  , 0  , 0  ), //      {r64, m64|mem}
+  ROW(2, 1, 1, 0, 31 , 10 , 0  , 0  , 0  , 0  ), // #28  {r8lo|r8hi|m8|r16|m16|r32|m32|r64|m64|mem, i8|u8}
+  ROW(2, 1, 1, 0, 11 , 12 , 0  , 0  , 0  , 0  ), //      {r16|m16, i16|u16}
+  ROW(2, 1, 1, 0, 13 , 14 , 0  , 0  , 0  , 0  ), //      {r32|m32, i32|u32}
+  ROW(2, 0, 1, 0, 8  , 32 , 0  , 0  , 0  , 0  ), //      {r64, u32|i32|r64|m64|mem}
+  ROW(2, 0, 1, 0, 30 , 24 , 0  , 0  , 0  , 0  ), //      {m64|mem, i32|r64}
+  ROW(2, 1, 1, 0, 1  , 2  , 0  , 0  , 0  , 0  ), //      {r8lo|r8hi|m8|mem, r8lo|r8hi}
+  ROW(2, 1, 1, 0, 27 , 4  , 0  , 0  , 0  , 0  ), //      {r16|m16|mem, r16}
+  ROW(2, 1, 1, 0, 28 , 6  , 0  , 0  , 0  , 0  ), //      {r32|m32|mem, r32}
+  ROW(2, 1, 1, 0, 2  , 18 , 0  , 0  , 0  , 0  ), //      {r8lo|r8hi, m8|mem}
+  ROW(2, 1, 1, 0, 4  , 21 , 0  , 0  , 0  , 0  ), //      {r16, m16|mem}
+  ROW(2, 1, 1, 0, 6  , 29 , 0  , 0  , 0  , 0  ), //      {r32, m32|mem}
+  ROW(2, 1, 1, 1, 33 , 1  , 0  , 0  , 0  , 0  ), // #39  {<ax>, r8lo|r8hi|m8|mem}
+  ROW(3, 1, 1, 2, 34 , 33 , 27 , 0  , 0  , 0  ), //      {<dx>, <ax>, r16|m16|mem}
+  ROW(3, 1, 1, 2, 35 , 36 , 28 , 0  , 0  , 0  ), //      {<edx>, <eax>, r32|m32|mem}
+  ROW(3, 0, 1, 2, 37 , 38 , 15 , 0  , 0  , 0  ), //      {<rdx>, <rax>, r64|m64|mem}
+  ROW(2, 1, 1, 0, 4  , 39 , 0  , 0  , 0  , 0  ), //      {r16, r16|m16|mem|i8|i16}
+  ROW(2, 1, 1, 0, 6  , 40 , 0  , 0  , 0  , 0  ), //      {r32, r32|m32|mem|i8|i32}
+  ROW(2, 0, 1, 0, 8  , 41 , 0  , 0  , 0  , 0  ), //      {r64, r64|m64|mem|i8|i32}
+  ROW(3, 1, 1, 0, 4  , 27 , 42 , 0  , 0  , 0  ), //      {r16, r16|m16|mem, i8|i16|u16}
+  ROW(3, 1, 1, 0, 6  , 28 , 43 , 0  , 0  , 0  ), //      {r32, r32|m32|mem, i8|i32|u32}
+  ROW(3, 0, 1, 0, 8  , 15 , 44 , 0  , 0  , 0  ), //      {r64, r64|m64|mem, i8|i32}
+  ROW(2, 0, 1, 0, 8  , 45 , 0  , 0  , 0  , 0  ), // #49  {r64, i64|u64}
+  ROW(2, 0, 1, 0, 46 , 18 , 0  , 0  , 0  , 0  ), //      {al, m8|mem}
+  ROW(2, 0, 1, 0, 47 , 21 , 0  , 0  , 0  , 0  ), //      {ax, m16|mem}
+  ROW(2, 0, 1, 0, 48 , 29 , 0  , 0  , 0  , 0  ), //      {eax, m32|mem}
+  ROW(2, 0, 1, 0, 49 , 30 , 0  , 0  , 0  , 0  ), //      {rax, m64|mem}
+  ROW(2, 0, 1, 0, 18 , 46 , 0  , 0  , 0  , 0  ), //      {m8|mem, al}
+  ROW(2, 0, 1, 0, 21 , 47 , 0  , 0  , 0  , 0  ), //      {m16|mem, ax}
+  ROW(2, 0, 1, 0, 29 , 48 , 0  , 0  , 0  , 0  ), //      {m32|mem, eax}
+  ROW(2, 0, 1, 0, 30 , 49 , 0  , 0  , 0  , 0  ), //      {m64|mem, rax}
+  ROW(2, 1, 1, 0, 1  , 2  , 0  , 0  , 0  , 0  ), // #58  {r8lo|r8hi|m8|mem, r8lo|r8hi}
+  ROW(2, 1, 1, 0, 27 , 4  , 0  , 0  , 0  , 0  ), //      {r16|m16|mem, r16}
+  ROW(2, 1, 1, 0, 28 , 6  , 0  , 0  , 0  , 0  ), //      {r32|m32|mem, r32}
+  ROW(2, 0, 1, 0, 15 , 8  , 0  , 0  , 0  , 0  ), // #61  {r64|m64|mem, r64}
+  ROW(2, 1, 1, 0, 2  , 18 , 0  , 0  , 0  , 0  ), //      {r8lo|r8hi, m8|mem}
+  ROW(2, 1, 1, 0, 4  , 21 , 0  , 0  , 0  , 0  ), //      {r16, m16|mem}
+  ROW(2, 1, 1, 0, 6  , 29 , 0  , 0  , 0  , 0  ), //      {r32, m32|mem}
+  ROW(2, 0, 1, 0, 8  , 30 , 0  , 0  , 0  , 0  ), //      {r64, m64|mem}
+  ROW(2, 1, 1, 0, 9  , 10 , 0  , 0  , 0  , 0  ), // #66  {r8lo|r8hi|m8, i8|u8}
+  ROW(2, 1, 1, 0, 11 , 12 , 0  , 0  , 0  , 0  ), //      {r16|m16, i16|u16}
+  ROW(2, 1, 1, 0, 13 , 14 , 0  , 0  , 0  , 0  ), //      {r32|m32, i32|u32}
+  ROW(2, 0, 1, 0, 15 , 24 , 0  , 0  , 0  , 0  ), //      {r64|m64|mem, i32|r64}
+  ROW(2, 1, 1, 0, 1  , 2  , 0  , 0  , 0  , 0  ), //      {r8lo|r8hi|m8|mem, r8lo|r8hi}
+  ROW(2, 1, 1, 0, 27 , 4  , 0  , 0  , 0  , 0  ), //      {r16|m16|mem, r16}
+  ROW(2, 1, 1, 0, 28 , 6  , 0  , 0  , 0  , 0  ), //      {r32|m32|mem, r32}
+  ROW(2, 1, 1, 0, 4  , 21 , 0  , 0  , 0  , 0  ), // #73  {r16, m16|mem}
+  ROW(2, 1, 1, 0, 6  , 29 , 0  , 0  , 0  , 0  ), //      {r32, m32|mem}
+  ROW(2, 0, 1, 0, 8  , 30 , 0  , 0  , 0  , 0  ), //      {r64, m64|mem}
+  ROW(2, 1, 1, 0, 21 , 4  , 0  , 0  , 0  , 0  ), //      {m16|mem, r16}
+  ROW(2, 1, 1, 0, 29 , 6  , 0  , 0  , 0  , 0  ), // #77  {m32|mem, r32}
+  ROW(2, 0, 1, 0, 30 , 8  , 0  , 0  , 0  , 0  ), //      {m64|mem, r64}
+  ROW(2, 1, 1, 0, 50 , 51 , 0  , 0  , 0  , 0  ), // #79  {xmm, xmm|m128|mem}
+  ROW(2, 1, 1, 0, 52 , 50 , 0  , 0  , 0  , 0  ), // #80  {m128|mem, xmm}
+  ROW(2, 1, 1, 0, 53 , 54 , 0  , 0  , 0  , 0  ), //      {ymm, ymm|m256|mem}
+  ROW(2, 1, 1, 0, 55 , 53 , 0  , 0  , 0  , 0  ), //      {m256|mem, ymm}
+  ROW(2, 1, 1, 0, 56 , 57 , 0  , 0  , 0  , 0  ), // #83  {zmm, zmm|m512|mem}
+  ROW(2, 1, 1, 0, 58 , 56 , 0  , 0  , 0  , 0  ), //      {m512|mem, zmm}
+  ROW(3, 1, 1, 0, 50 , 50 , 59 , 0  , 0  , 0  ), // #85  {xmm, xmm, xmm|m128|mem|i8|u8}
+  ROW(3, 1, 1, 0, 50 , 52 , 10 , 0  , 0  , 0  ), //      {xmm, m128|mem, i8|u8}
+  ROW(3, 1, 1, 0, 53 , 53 , 60 , 0  , 0  , 0  ), //      {ymm, ymm, ymm|m256|mem|i8|u8}
+  ROW(3, 1, 1, 0, 53 , 55 , 10 , 0  , 0  , 0  ), //      {ymm, m256|mem, i8|u8}
+  ROW(3, 1, 1, 0, 56 , 56 , 61 , 0  , 0  , 0  ), //      {zmm, zmm, zmm|m512|mem|i8|u8}
+  ROW(3, 1, 1, 0, 56 , 58 , 10 , 0  , 0  , 0  ), //      {zmm, m512|mem, i8|u8}
+  ROW(3, 1, 1, 0, 50 , 50 , 59 , 0  , 0  , 0  ), // #91  {xmm, xmm, i8|u8|xmm|m128|mem}
+  ROW(3, 1, 1, 0, 53 , 53 , 59 , 0  , 0  , 0  ), //      {ymm, ymm, i8|u8|xmm|m128|mem}
+  ROW(3, 1, 1, 0, 50 , 52 , 10 , 0  , 0  , 0  ), //      {xmm, m128|mem, i8|u8}
+  ROW(3, 1, 1, 0, 53 , 55 , 10 , 0  , 0  , 0  ), //      {ymm, m256|mem, i8|u8}
+  ROW(3, 1, 1, 0, 56 , 56 , 59 , 0  , 0  , 0  ), //      {zmm, zmm, xmm|m128|mem|i8|u8}
+  ROW(3, 1, 1, 0, 56 , 58 , 10 , 0  , 0  , 0  ), //      {zmm, m512|mem, i8|u8}
+  ROW(3, 1, 1, 0, 50 , 50 , 59 , 0  , 0  , 0  ), // #97  {xmm, xmm, xmm|m128|mem|i8|u8}
+  ROW(3, 1, 1, 0, 50 , 52 , 10 , 0  , 0  , 0  ), //      {xmm, m128|mem, i8|u8}
+  ROW(3, 1, 1, 0, 53 , 53 , 59 , 0  , 0  , 0  ), //      {ymm, ymm, xmm|m128|mem|i8|u8}
+  ROW(3, 1, 1, 0, 53 , 55 , 10 , 0  , 0  , 0  ), //      {ymm, m256|mem, i8|u8}
+  ROW(3, 1, 1, 0, 56 , 56 , 59 , 0  , 0  , 0  ), //      {zmm, zmm, xmm|m128|mem|i8|u8}
+  ROW(3, 1, 1, 0, 56 , 58 , 10 , 0  , 0  , 0  ), //      {zmm, m512|mem, i8|u8}
+  ROW(2, 1, 1, 0, 62 , 63 , 0  , 0  , 0  , 0  ), // #103 {mm, mm|m64|mem|r64}
+  ROW(2, 1, 1, 0, 15 , 64 , 0  , 0  , 0  , 0  ), //      {m64|mem|r64, mm|xmm}
+  ROW(2, 0, 1, 0, 50 , 15 , 0  , 0  , 0  , 0  ), //      {xmm, r64|m64|mem}
+  ROW(2, 1, 1, 0, 50 , 65 , 0  , 0  , 0  , 0  ), // #106 {xmm, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 30 , 50 , 0  , 0  , 0  , 0  ), // #107 {m64|mem, xmm}
+  ROW(0, 1, 1, 0, 0  , 0  , 0  , 0  , 0  , 0  ), // #108 {}
+  ROW(1, 1, 1, 0, 66 , 0  , 0  , 0  , 0  , 0  ), //      {r16|m16|r32|m32|r64|m64}
+  ROW(2, 1, 1, 0, 27 , 4  , 0  , 0  , 0  , 0  ), //      {r16|m16|mem, r16}
+  ROW(2, 1, 1, 0, 28 , 6  , 0  , 0  , 0  , 0  ), //      {r32|m32|mem, r32}
+  ROW(2, 1, 1, 0, 15 , 8  , 0  , 0  , 0  , 0  ), //      {r64|m64|mem, r64}
+  ROW(3, 1, 1, 0, 50 , 67 , 50 , 0  , 0  , 0  ), // #113 {xmm, vm32x, xmm}
+  ROW(3, 1, 1, 0, 53 , 68 , 53 , 0  , 0  , 0  ), //      {ymm, vm32y, ymm}
+  ROW(2, 1, 1, 0, 50 , 67 , 0  , 0  , 0  , 0  ), //      {xmm, vm32x}
+  ROW(2, 1, 1, 0, 53 , 68 , 0  , 0  , 0  , 0  ), //      {ymm, vm32y}
+  ROW(2, 1, 1, 0, 56 , 69 , 0  , 0  , 0  , 0  ), //      {zmm, vm32z}
+  ROW(3, 1, 1, 0, 50 , 70 , 50 , 0  , 0  , 0  ), // #118 {xmm, vm64x, xmm}
+  ROW(3, 1, 1, 0, 53 , 71 , 53 , 0  , 0  , 0  ), //      {ymm, vm64y, ymm}
+  ROW(2, 1, 1, 0, 50 , 70 , 0  , 0  , 0  , 0  ), //      {xmm, vm64x}
+  ROW(2, 1, 1, 0, 53 , 71 , 0  , 0  , 0  , 0  ), //      {ymm, vm64y}
+  ROW(2, 1, 1, 0, 56 , 72 , 0  , 0  , 0  , 0  ), //      {zmm, vm64z}
+  ROW(2, 1, 1, 0, 25 , 10 , 0  , 0  , 0  , 0  ), // #123 {r16|m16|r32|m32|r64|m64|mem, i8|u8}
+  ROW(2, 1, 1, 0, 27 , 4  , 0  , 0  , 0  , 0  ), //      {r16|m16|mem, r16}
+  ROW(2, 1, 1, 0, 28 , 6  , 0  , 0  , 0  , 0  ), //      {r32|m32|mem, r32}
+  ROW(2, 0, 1, 0, 15 , 8  , 0  , 0  , 0  , 0  ), //      {r64|m64|mem, r64}
+  ROW(2, 1, 1, 2, 73 , 74 , 0  , 0  , 0  , 0  ), // #127 {<ds:[m8|memBase|zsi]>, <es:[m8|memBase|zdi]>}
+  ROW(2, 1, 1, 2, 75 , 76 , 0  , 0  , 0  , 0  ), //      {<ds:[m16|memBase|zsi]>, <es:[m16|memBase|zdi]>}
+  ROW(2, 1, 1, 2, 77 , 78 , 0  , 0  , 0  , 0  ), //      {<ds:[m32|memBase|zsi]>, <es:[m32|memBase|zdi]>}
+  ROW(2, 0, 1, 2, 79 , 80 , 0  , 0  , 0  , 0  ), //      {<ds:[m64|memBase|zsi]>, <es:[m64|memBase|zdi]>}
+  ROW(3, 1, 1, 1, 1  , 2  , 81 , 0  , 0  , 0  ), // #131 {r8lo|r8hi|m8|mem, r8lo|r8hi, <al>}
+  ROW(3, 1, 1, 1, 27 , 4  , 33 , 0  , 0  , 0  ), //      {r16|m16|mem, r16, <ax>}
+  ROW(3, 1, 1, 1, 28 , 6  , 36 , 0  , 0  , 0  ), //      {r32|m32|mem, r32, <eax>}
+  ROW(3, 0, 1, 1, 15 , 8  , 38 , 0  , 0  , 0  ), //      {r64|m64|mem, r64, <rax>}
+  ROW(2, 1, 1, 2, 81 , 82 , 0  , 0  , 0  , 0  ), // #135 {<al>, <ds:[m8|memBase|zsi|mem]>}
+  ROW(2, 1, 1, 2, 33 , 83 , 0  , 0  , 0  , 0  ), //      {<ax>, <ds:[m16|memBase|zsi|mem]>}
+  ROW(2, 1, 1, 2, 36 , 84 , 0  , 0  , 0  , 0  ), //      {<eax>, <ds:[m32|memBase|zsi|mem]>}
+  ROW(2, 0, 1, 2, 38 , 85 , 0  , 0  , 0  , 0  ), //      {<rax>, <ds:[m64|memBase|zsi|mem]>}
+  ROW(2, 1, 1, 2, 74 , 73 , 0  , 0  , 0  , 0  ), // #139 {<es:[m8|memBase|zdi]>, <ds:[m8|memBase|zsi]>}
+  ROW(2, 1, 1, 2, 76 , 75 , 0  , 0  , 0  , 0  ), //      {<es:[m16|memBase|zdi]>, <ds:[m16|memBase|zsi]>}
+  ROW(2, 1, 1, 2, 78 , 77 , 0  , 0  , 0  , 0  ), //      {<es:[m32|memBase|zdi]>, <ds:[m32|memBase|zsi]>}
+  ROW(2, 0, 1, 2, 80 , 79 , 0  , 0  , 0  , 0  ), //      {<es:[m64|memBase|zdi]>, <ds:[m64|memBase|zsi]>}
+  ROW(1, 1, 1, 0, 86 , 0  , 0  , 0  , 0  , 0  ), // #143 {r16|m16|r64|m64}
+  ROW(1, 1, 0, 0, 13 , 0  , 0  , 0  , 0  , 0  ), //      {r32|m32}
+  ROW(1, 1, 0, 0, 87 , 0  , 0  , 0  , 0  , 0  ), //      {ds|es|ss}
+  ROW(1, 1, 1, 0, 88 , 0  , 0  , 0  , 0  , 0  ), //      {fs|gs}
+  ROW(1, 1, 1, 0, 89 , 0  , 0  , 0  , 0  , 0  ), // #147 {r16|m16|r64|m64|i8|i16|i32}
+  ROW(1, 1, 0, 0, 90 , 0  , 0  , 0  , 0  , 0  ), //      {r32|m32|i32|u32}
+  ROW(1, 1, 0, 0, 91 , 0  , 0  , 0  , 0  , 0  ), //      {cs|ss|ds|es}
+  ROW(1, 1, 1, 0, 88 , 0  , 0  , 0  , 0  , 0  ), //      {fs|gs}
+  ROW(2, 1, 1, 2, 81 , 92 , 0  , 0  , 0  , 0  ), // #151 {<al>, <es:[m8|memBase|zdi|mem]>}
+  ROW(2, 1, 1, 2, 33 , 93 , 0  , 0  , 0  , 0  ), //      {<ax>, <es:[m16|memBase|zdi|mem]>}
+  ROW(2, 1, 1, 2, 36 , 94 , 0  , 0  , 0  , 0  ), //      {<eax>, <es:[m32|memBase|zdi|mem]>}
+  ROW(2, 0, 1, 2, 38 , 95 , 0  , 0  , 0  , 0  ), //      {<rax>, <es:[m64|memBase|zdi|mem]>}
+  ROW(2, 1, 1, 2, 92 , 81 , 0  , 0  , 0  , 0  ), // #155 {<es:[m8|memBase|zdi|mem]>, <al>}
+  ROW(2, 1, 1, 2, 93 , 33 , 0  , 0  , 0  , 0  ), //      {<es:[m16|memBase|zdi|mem]>, <ax>}
+  ROW(2, 1, 1, 2, 94 , 36 , 0  , 0  , 0  , 0  ), //      {<es:[m32|memBase|zdi|mem]>, <eax>}
+  ROW(2, 0, 1, 2, 95 , 38 , 0  , 0  , 0  , 0  ), //      {<es:[m64|memBase|zdi|mem]>, <rax>}
+  ROW(4, 1, 1, 0, 50 , 50 , 50 , 51 , 0  , 0  ), // #159 {xmm, xmm, xmm, xmm|m128|mem}
+  ROW(4, 1, 1, 0, 50 , 50 , 52 , 50 , 0  , 0  ), //      {xmm, xmm, m128|mem, xmm}
+  ROW(4, 1, 1, 0, 53 , 53 , 53 , 54 , 0  , 0  ), //      {ymm, ymm, ymm, ymm|m256|mem}
+  ROW(4, 1, 1, 0, 53 , 53 , 55 , 53 , 0  , 0  ), //      {ymm, ymm, m256|mem, ymm}
+  ROW(3, 1, 1, 0, 50 , 67 , 50 , 0  , 0  , 0  ), // #163 {xmm, vm32x, xmm}
+  ROW(3, 1, 1, 0, 53 , 67 , 53 , 0  , 0  , 0  ), //      {ymm, vm32x, ymm}
+  ROW(2, 1, 1, 0, 96 , 67 , 0  , 0  , 0  , 0  ), //      {xmm|ymm, vm32x}
+  ROW(2, 1, 1, 0, 56 , 68 , 0  , 0  , 0  , 0  ), //      {zmm, vm32y}
+  ROW(3, 1, 1, 0, 52 , 50 , 50 , 0  , 0  , 0  ), // #167 {m128|mem, xmm, xmm}
+  ROW(3, 1, 1, 0, 55 , 53 , 53 , 0  , 0  , 0  ), //      {m256|mem, ymm, ymm}
+  ROW(3, 1, 1, 0, 50 , 50 , 52 , 0  , 0  , 0  ), //      {xmm, xmm, m128|mem}
+  ROW(3, 1, 1, 0, 53 , 53 , 55 , 0  , 0  , 0  ), //      {ymm, ymm, m256|mem}
+  ROW(5, 1, 1, 0, 50 , 50 , 51 , 50 , 97 , 0  ), // #171 {xmm, xmm, xmm|m128|mem, xmm, i4|u4}
+  ROW(5, 1, 1, 0, 50 , 50 , 50 , 52 , 97 , 0  ), //      {xmm, xmm, xmm, m128|mem, i4|u4}
+  ROW(5, 1, 1, 0, 53 , 53 , 54 , 53 , 97 , 0  ), //      {ymm, ymm, ymm|m256|mem, ymm, i4|u4}
+  ROW(5, 1, 1, 0, 53 , 53 , 53 , 55 , 97 , 0  ), //      {ymm, ymm, ymm, m256|mem, i4|u4}
+  ROW(3, 1, 1, 0, 53 , 54 , 10 , 0  , 0  , 0  ), // #175 {ymm, ymm|m256|mem, i8|u8}
+  ROW(3, 1, 1, 0, 53 , 53 , 54 , 0  , 0  , 0  ), //      {ymm, ymm, ymm|m256|mem}
+  ROW(3, 1, 1, 0, 56 , 56 , 61 , 0  , 0  , 0  ), //      {zmm, zmm, zmm|m512|mem|i8|u8}
+  ROW(3, 1, 1, 0, 56 , 58 , 10 , 0  , 0  , 0  ), //      {zmm, m512|mem, i8|u8}
+  ROW(2, 1, 1, 0, 4  , 27 , 0  , 0  , 0  , 0  ), // #179 {r16, r16|m16|mem}
+  ROW(2, 1, 1, 0, 6  , 28 , 0  , 0  , 0  , 0  ), // #180 {r32, r32|m32|mem}
+  ROW(2, 0, 1, 0, 8  , 15 , 0  , 0  , 0  , 0  ), //      {r64, r64|m64|mem}
+  ROW(1, 1, 1, 0, 98 , 0  , 0  , 0  , 0  , 0  ), // #182 {m32|m64}
+  ROW(2, 1, 1, 0, 99 , 100, 0  , 0  , 0  , 0  ), //      {st0, st}
+  ROW(2, 1, 1, 0, 100, 99 , 0  , 0  , 0  , 0  ), //      {st, st0}
+  ROW(2, 1, 1, 0, 4  , 29 , 0  , 0  , 0  , 0  ), // #185 {r16, m32|mem}
+  ROW(2, 1, 1, 0, 6  , 101, 0  , 0  , 0  , 0  ), //      {r32, m48|mem}
+  ROW(2, 0, 1, 0, 8  , 102, 0  , 0  , 0  , 0  ), //      {r64, m80|mem}
+  ROW(3, 1, 1, 0, 27 , 4  , 103, 0  , 0  , 0  ), // #188 {r16|m16|mem, r16, cl|i8|u8}
+  ROW(3, 1, 1, 0, 28 , 6  , 103, 0  , 0  , 0  ), //      {r32|m32|mem, r32, cl|i8|u8}
+  ROW(3, 0, 1, 0, 15 , 8  , 103, 0  , 0  , 0  ), //      {r64|m64|mem, r64, cl|i8|u8}
+  ROW(3, 1, 1, 0, 50 , 50 , 51 , 0  , 0  , 0  ), // #191 {xmm, xmm, xmm|m128|mem}
+  ROW(3, 1, 1, 0, 53 , 53 , 54 , 0  , 0  , 0  ), // #192 {ymm, ymm, ymm|m256|mem}
+  ROW(3, 1, 1, 0, 56 , 56 , 57 , 0  , 0  , 0  ), //      {zmm, zmm, zmm|m512|mem}
+  ROW(4, 1, 1, 0, 50 , 50 , 51 , 10 , 0  , 0  ), // #194 {xmm, xmm, xmm|m128|mem, i8|u8}
+  ROW(4, 1, 1, 0, 53 , 53 , 54 , 10 , 0  , 0  ), // #195 {ymm, ymm, ymm|m256|mem, i8|u8}
+  ROW(4, 1, 1, 0, 56 , 56 , 57 , 10 , 0  , 0  ), //      {zmm, zmm, zmm|m512|mem, i8|u8}
+  ROW(4, 1, 1, 0, 104, 50 , 51 , 10 , 0  , 0  ), // #197 {xmm|k, xmm, xmm|m128|mem, i8|u8}
+  ROW(4, 1, 1, 0, 105, 53 , 54 , 10 , 0  , 0  ), //      {ymm|k, ymm, ymm|m256|mem, i8|u8}
+  ROW(4, 1, 1, 0, 106, 56 , 57 , 10 , 0  , 0  ), //      {k, zmm, zmm|m512|mem, i8|u8}
+  ROW(4, 1, 1, 0, 106, 50 , 51 , 10 , 0  , 0  ), // #200 {k, xmm, xmm|m128|mem, i8|u8}
+  ROW(4, 1, 1, 0, 106, 53 , 54 , 10 , 0  , 0  ), //      {k, ymm, ymm|m256|mem, i8|u8}
+  ROW(4, 1, 1, 0, 106, 56 , 57 , 10 , 0  , 0  ), //      {k, zmm, zmm|m512|mem, i8|u8}
+  ROW(2, 1, 1, 0, 51 , 50 , 0  , 0  , 0  , 0  ), // #203 {xmm|m128|mem, xmm}
+  ROW(2, 1, 1, 0, 54 , 53 , 0  , 0  , 0  , 0  ), //      {ymm|m256|mem, ymm}
+  ROW(2, 1, 1, 0, 57 , 56 , 0  , 0  , 0  , 0  ), //      {zmm|m512|mem, zmm}
+  ROW(2, 1, 1, 0, 50 , 65 , 0  , 0  , 0  , 0  ), // #206 {xmm, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 53 , 51 , 0  , 0  , 0  , 0  ), //      {ymm, xmm|m128|mem}
+  ROW(2, 1, 1, 0, 56 , 54 , 0  , 0  , 0  , 0  ), //      {zmm, ymm|m256|mem}
+  ROW(2, 1, 1, 0, 50 , 51 , 0  , 0  , 0  , 0  ), // #209 {xmm, xmm|m128|mem}
+  ROW(2, 1, 1, 0, 53 , 54 , 0  , 0  , 0  , 0  ), //      {ymm, ymm|m256|mem}
+  ROW(2, 1, 1, 0, 56 , 57 , 0  , 0  , 0  , 0  ), //      {zmm, zmm|m512|mem}
+  ROW(2, 1, 1, 0, 50 , 107, 0  , 0  , 0  , 0  ), // #212 {xmm, xmm|m32|mem}
+  ROW(2, 1, 1, 0, 53 , 65 , 0  , 0  , 0  , 0  ), //      {ymm, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 56 , 51 , 0  , 0  , 0  , 0  ), //      {zmm, xmm|m128|mem}
+  ROW(3, 1, 1, 0, 65 , 50 , 10 , 0  , 0  , 0  ), // #215 {xmm|m64|mem, xmm, i8|u8}
+  ROW(3, 1, 1, 0, 51 , 53 , 10 , 0  , 0  , 0  ), // #216 {xmm|m128|mem, ymm, i8|u8}
+  ROW(3, 1, 1, 0, 54 , 56 , 10 , 0  , 0  , 0  ), // #217 {ymm|m256|mem, zmm, i8|u8}
+  ROW(3, 1, 1, 0, 50 , 108, 50 , 0  , 0  , 0  ), // #218 {xmm, vm64x|vm64y, xmm}
+  ROW(2, 1, 1, 0, 50 , 108, 0  , 0  , 0  , 0  ), //      {xmm, vm64x|vm64y}
+  ROW(2, 1, 1, 0, 53 , 72 , 0  , 0  , 0  , 0  ), //      {ymm, vm64z}
+  ROW(3, 1, 1, 0, 50 , 51 , 10 , 0  , 0  , 0  ), // #221 {xmm, xmm|m128|mem, i8|u8}
+  ROW(3, 1, 1, 0, 53 , 54 , 10 , 0  , 0  , 0  ), //      {ymm, ymm|m256|mem, i8|u8}
+  ROW(3, 1, 1, 0, 56 , 57 , 10 , 0  , 0  , 0  ), //      {zmm, zmm|m512|mem, i8|u8}
+  ROW(2, 1, 1, 0, 50 , 65 , 0  , 0  , 0  , 0  ), // #224 {xmm, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 53 , 54 , 0  , 0  , 0  , 0  ), //      {ymm, ymm|m256|mem}
+  ROW(2, 1, 1, 0, 56 , 57 , 0  , 0  , 0  , 0  ), //      {zmm, zmm|m512|mem}
+  ROW(2, 1, 1, 0, 52 , 50 , 0  , 0  , 0  , 0  ), // #227 {m128|mem, xmm}
+  ROW(2, 1, 1, 0, 55 , 53 , 0  , 0  , 0  , 0  ), //      {m256|mem, ymm}
+  ROW(2, 1, 1, 0, 58 , 56 , 0  , 0  , 0  , 0  ), //      {m512|mem, zmm}
+  ROW(2, 1, 1, 0, 50 , 52 , 0  , 0  , 0  , 0  ), // #230 {xmm, m128|mem}
+  ROW(2, 1, 1, 0, 53 , 55 , 0  , 0  , 0  , 0  ), //      {ymm, m256|mem}
+  ROW(2, 1, 1, 0, 56 , 58 , 0  , 0  , 0  , 0  ), //      {zmm, m512|mem}
+  ROW(2, 0, 1, 0, 15 , 50 , 0  , 0  , 0  , 0  ), // #233 {r64|m64|mem, xmm}
+  ROW(2, 1, 1, 0, 50 , 109, 0  , 0  , 0  , 0  ), //      {xmm, xmm|m64|mem|r64}
+  ROW(2, 1, 1, 0, 30 , 50 , 0  , 0  , 0  , 0  ), //      {m64|mem, xmm}
+  ROW(2, 1, 1, 0, 30 , 50 , 0  , 0  , 0  , 0  ), // #236 {m64|mem, xmm}
+  ROW(2, 1, 1, 0, 50 , 30 , 0  , 0  , 0  , 0  ), //      {xmm, m64|mem}
+  ROW(3, 1, 1, 0, 50 , 50 , 50 , 0  , 0  , 0  ), // #238 {xmm, xmm, xmm}
+  ROW(2, 1, 1, 0, 21 , 50 , 0  , 0  , 0  , 0  ), // #239 {m16|mem, xmm}
+  ROW(2, 1, 1, 0, 50 , 21 , 0  , 0  , 0  , 0  ), //      {xmm, m16|mem}
+  ROW(3, 1, 1, 0, 50 , 50 , 50 , 0  , 0  , 0  ), //      {xmm, xmm, xmm}
+  ROW(2, 1, 1, 0, 29 , 50 , 0  , 0  , 0  , 0  ), // #242 {m32|mem, xmm}
+  ROW(2, 1, 1, 0, 50 , 29 , 0  , 0  , 0  , 0  ), //      {xmm, m32|mem}
+  ROW(3, 1, 1, 0, 50 , 50 , 50 , 0  , 0  , 0  ), //      {xmm, xmm, xmm}
+  ROW(4, 1, 1, 0, 106, 106, 50 , 51 , 0  , 0  ), // #245 {k, k, xmm, xmm|m128|mem}
+  ROW(4, 1, 1, 0, 106, 106, 53 , 54 , 0  , 0  ), //      {k, k, ymm, ymm|m256|mem}
+  ROW(4, 1, 1, 0, 106, 106, 56 , 57 , 0  , 0  ), //      {k, k, zmm, zmm|m512|mem}
+  ROW(2, 1, 1, 0, 96 , 109, 0  , 0  , 0  , 0  ), // #248 {xmm|ymm, xmm|m64|mem|r64}
+  ROW(2, 0, 1, 0, 56 , 8  , 0  , 0  , 0  , 0  ), //      {zmm, r64}
+  ROW(2, 1, 1, 0, 56 , 65 , 0  , 0  , 0  , 0  ), //      {zmm, xmm|m64|mem}
+  ROW(3, 1, 1, 0, 104, 50 , 51 , 0  , 0  , 0  ), // #251 {xmm|k, xmm, xmm|m128|mem}
+  ROW(3, 1, 1, 0, 105, 53 , 54 , 0  , 0  , 0  ), //      {ymm|k, ymm, ymm|m256|mem}
+  ROW(3, 1, 1, 0, 106, 56 , 57 , 0  , 0  , 0  ), //      {k, zmm, zmm|m512|mem}
+  ROW(2, 1, 1, 0, 107, 50 , 0  , 0  , 0  , 0  ), // #254 {xmm|m32|mem, xmm}
+  ROW(2, 1, 1, 0, 65 , 53 , 0  , 0  , 0  , 0  ), //      {xmm|m64|mem, ymm}
+  ROW(2, 1, 1, 0, 51 , 56 , 0  , 0  , 0  , 0  ), //      {xmm|m128|mem, zmm}
+  ROW(2, 1, 1, 0, 65 , 50 , 0  , 0  , 0  , 0  ), // #257 {xmm|m64|mem, xmm}
+  ROW(2, 1, 1, 0, 51 , 53 , 0  , 0  , 0  , 0  ), //      {xmm|m128|mem, ymm}
+  ROW(2, 1, 1, 0, 54 , 56 , 0  , 0  , 0  , 0  ), //      {ymm|m256|mem, zmm}
+  ROW(2, 1, 1, 0, 110, 50 , 0  , 0  , 0  , 0  ), // #260 {xmm|m16|mem, xmm}
+  ROW(2, 1, 1, 0, 107, 53 , 0  , 0  , 0  , 0  ), //      {xmm|m32|mem, ymm}
+  ROW(2, 1, 1, 0, 65 , 56 , 0  , 0  , 0  , 0  ), //      {xmm|m64|mem, zmm}
+  ROW(2, 1, 1, 0, 50 , 110, 0  , 0  , 0  , 0  ), // #263 {xmm, xmm|m16|mem}
+  ROW(2, 1, 1, 0, 53 , 107, 0  , 0  , 0  , 0  ), //      {ymm, xmm|m32|mem}
+  ROW(2, 1, 1, 0, 56 , 65 , 0  , 0  , 0  , 0  ), //      {zmm, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 67 , 50 , 0  , 0  , 0  , 0  ), // #266 {vm32x, xmm}
+  ROW(2, 1, 1, 0, 68 , 53 , 0  , 0  , 0  , 0  ), //      {vm32y, ymm}
+  ROW(2, 1, 1, 0, 69 , 56 , 0  , 0  , 0  , 0  ), //      {vm32z, zmm}
+  ROW(2, 1, 1, 0, 70 , 50 , 0  , 0  , 0  , 0  ), // #269 {vm64x, xmm}
+  ROW(2, 1, 1, 0, 71 , 53 , 0  , 0  , 0  , 0  ), //      {vm64y, ymm}
+  ROW(2, 1, 1, 0, 72 , 56 , 0  , 0  , 0  , 0  ), //      {vm64z, zmm}
+  ROW(3, 1, 1, 0, 106, 50 , 51 , 0  , 0  , 0  ), // #272 {k, xmm, xmm|m128|mem}
+  ROW(3, 1, 1, 0, 106, 53 , 54 , 0  , 0  , 0  ), //      {k, ymm, ymm|m256|mem}
+  ROW(3, 1, 1, 0, 106, 56 , 57 , 0  , 0  , 0  ), //      {k, zmm, zmm|m512|mem}
+  ROW(3, 1, 1, 0, 6  , 6  , 28 , 0  , 0  , 0  ), // #275 {r32, r32, r32|m32|mem}
+  ROW(3, 0, 1, 0, 8  , 8  , 15 , 0  , 0  , 0  ), //      {r64, r64, r64|m64|mem}
+  ROW(3, 1, 1, 0, 6  , 28 , 6  , 0  , 0  , 0  ), // #277 {r32, r32|m32|mem, r32}
+  ROW(3, 0, 1, 0, 8  , 15 , 8  , 0  , 0  , 0  ), //      {r64, r64|m64|mem, r64}
+  ROW(2, 1, 0, 0, 111, 28 , 0  , 0  , 0  , 0  ), // #279 {bnd, r32|m32|mem}
+  ROW(2, 0, 1, 0, 111, 15 , 0  , 0  , 0  , 0  ), //      {bnd, r64|m64|mem}
+  ROW(2, 1, 1, 0, 111, 112, 0  , 0  , 0  , 0  ), // #281 {bnd, bnd|mem}
+  ROW(2, 1, 1, 0, 113, 111, 0  , 0  , 0  , 0  ), //      {mem, bnd}
+  ROW(2, 1, 0, 0, 4  , 29 , 0  , 0  , 0  , 0  ), // #283 {r16, m32|mem}
+  ROW(2, 1, 0, 0, 6  , 30 , 0  , 0  , 0  , 0  ), //      {r32, m64|mem}
+  ROW(1, 1, 0, 0, 114, 0  , 0  , 0  , 0  , 0  ), // #285 {rel16|r16|m16|r32|m32}
+  ROW(1, 1, 1, 0, 115, 0  , 0  , 0  , 0  , 0  ), //      {rel32|r64|m64|mem}
+  ROW(2, 1, 1, 0, 6  , 116, 0  , 0  , 0  , 0  ), // #287 {r32, r8lo|r8hi|m8|r16|m16|r32|m32}
+  ROW(2, 0, 1, 0, 8  , 117, 0  , 0  , 0  , 0  ), //      {r64, r8lo|r8hi|m8|r64|m64}
+  ROW(1, 1, 0, 0, 118, 0  , 0  , 0  , 0  , 0  ), // #289 {r16|r32}
+  ROW(1, 1, 1, 0, 31 , 0  , 0  , 0  , 0  , 0  ), // #290 {r8lo|r8hi|m8|r16|m16|r32|m32|r64|m64|mem}
+  ROW(2, 1, 0, 0, 119, 58 , 0  , 0  , 0  , 0  ), // #291 {es:[mem|m512|memBase], m512|mem}
+  ROW(2, 0, 1, 0, 119, 58 , 0  , 0  , 0  , 0  ), //      {es:[mem|m512|memBase], m512|mem}
+  ROW(3, 1, 1, 0, 50 , 10 , 10 , 0  , 0  , 0  ), // #293 {xmm, i8|u8, i8|u8}
+  ROW(2, 1, 1, 0, 50 , 50 , 0  , 0  , 0  , 0  ), // #294 {xmm, xmm}
+  ROW(0, 1, 1, 0, 0  , 0  , 0  , 0  , 0  , 0  ), // #295 {}
+  ROW(1, 1, 1, 0, 100, 0  , 0  , 0  , 0  , 0  ), // #296 {st}
+  ROW(0, 1, 1, 0, 0  , 0  , 0  , 0  , 0  , 0  ), // #297 {}
+  ROW(1, 1, 1, 0, 120, 0  , 0  , 0  , 0  , 0  ), // #298 {m32|m64|st}
+  ROW(2, 1, 1, 0, 50 , 50 , 0  , 0  , 0  , 0  ), // #299 {xmm, xmm}
+  ROW(4, 1, 1, 0, 50 , 50 , 10 , 10 , 0  , 0  ), //      {xmm, xmm, i8|u8, i8|u8}
+  ROW(2, 1, 0, 0, 6  , 52 , 0  , 0  , 0  , 0  ), // #301 {r32, m128|mem}
+  ROW(2, 0, 1, 0, 8  , 52 , 0  , 0  , 0  , 0  ), //      {r64, m128|mem}
+  ROW(2, 1, 0, 2, 36 , 121, 0  , 0  , 0  , 0  ), // #303 {<eax>, <ecx>}
+  ROW(2, 0, 1, 2, 122, 121, 0  , 0  , 0  , 0  ), //      {<eax|rax>, <ecx>}
+  ROW(1, 1, 1, 0, 123, 0  , 0  , 0  , 0  , 0  ), // #305 {rel8|rel32}
+  ROW(1, 1, 0, 0, 124, 0  , 0  , 0  , 0  , 0  ), //      {rel16}
+  ROW(2, 1, 0, 1, 125, 126, 0  , 0  , 0  , 0  ), // #307 {<cx|ecx>, rel8}
+  ROW(2, 0, 1, 1, 127, 126, 0  , 0  , 0  , 0  ), //      {<ecx|rcx>, rel8}
+  ROW(1, 1, 1, 0, 128, 0  , 0  , 0  , 0  , 0  ), // #309 {rel8|rel32|r64|m64|mem}
+  ROW(1, 1, 0, 0, 129, 0  , 0  , 0  , 0  , 0  ), //      {rel16|r32|m32|mem}
+  ROW(2, 1, 1, 0, 106, 130, 0  , 0  , 0  , 0  ), // #311 {k, k|m8|mem|r32}
+  ROW(2, 1, 1, 0, 131, 106, 0  , 0  , 0  , 0  ), //      {m8|mem|r32, k}
+  ROW(2, 1, 1, 0, 106, 132, 0  , 0  , 0  , 0  ), // #313 {k, k|m32|mem|r32}
+  ROW(2, 1, 1, 0, 28 , 106, 0  , 0  , 0  , 0  ), //      {m32|mem|r32, k}
+  ROW(2, 1, 1, 0, 106, 133, 0  , 0  , 0  , 0  ), // #315 {k, k|m64|mem|r64}
+  ROW(2, 1, 1, 0, 15 , 106, 0  , 0  , 0  , 0  ), //      {m64|mem|r64, k}
+  ROW(2, 1, 1, 0, 106, 134, 0  , 0  , 0  , 0  ), // #317 {k, k|m16|mem|r32}
+  ROW(2, 1, 1, 0, 135, 106, 0  , 0  , 0  , 0  ), //      {m16|mem|r32, k}
+  ROW(2, 1, 1, 0, 4  , 27 , 0  , 0  , 0  , 0  ), // #319 {r16, r16|m16|mem}
+  ROW(2, 1, 1, 0, 6  , 135, 0  , 0  , 0  , 0  ), //      {r32, r32|m16|mem}
+  ROW(2, 1, 0, 0, 136, 137, 0  , 0  , 0  , 0  ), // #321 {i16, i16|i32}
+  ROW(1, 1, 1, 0, 138, 0  , 0  , 0  , 0  , 0  ), //      {m32|m48|m80|mem}
+  ROW(2, 1, 0, 0, 4  , 29 , 0  , 0  , 0  , 0  ), // #323 {r16, m32|mem}
+  ROW(2, 1, 0, 0, 6  , 101, 0  , 0  , 0  , 0  ), //      {r32, m48|mem}
+  ROW(2, 1, 1, 0, 4  , 27 , 0  , 0  , 0  , 0  ), // #325 {r16, r16|m16|mem}
+  ROW(2, 1, 1, 0, 139, 135, 0  , 0  , 0  , 0  ), //      {r32|r64, r32|m16|mem}
+  ROW(2, 1, 1, 0, 64 , 28 , 0  , 0  , 0  , 0  ), // #327 {mm|xmm, r32|m32|mem}
+  ROW(2, 1, 1, 0, 28 , 64 , 0  , 0  , 0  , 0  ), //      {r32|m32|mem, mm|xmm}
+  ROW(2, 1, 1, 0, 50 , 107, 0  , 0  , 0  , 0  ), // #329 {xmm, xmm|m32|mem}
+  ROW(2, 1, 1, 0, 29 , 50 , 0  , 0  , 0  , 0  ), //      {m32|mem, xmm}
+  ROW(2, 1, 1, 0, 4  , 9  , 0  , 0  , 0  , 0  ), // #331 {r16, r8lo|r8hi|m8}
+  ROW(2, 1, 1, 0, 139, 140, 0  , 0  , 0  , 0  ), //      {r32|r64, r8lo|r8hi|m8|r16|m16}
+  ROW(2, 0, 1, 0, 4  , 27 , 0  , 0  , 0  , 0  ), // #333 {r16, r16|m16|mem}
+  ROW(2, 0, 1, 0, 139, 28 , 0  , 0  , 0  , 0  ), //      {r32|r64, r32|m32|mem}
+  ROW(4, 1, 1, 1, 6  , 6  , 28 , 35 , 0  , 0  ), // #335 {r32, r32, r32|m32|mem, <edx>}
+  ROW(4, 0, 1, 1, 8  , 8  , 15 , 37 , 0  , 0  ), //      {r64, r64, r64|m64|mem, <rdx>}
+  ROW(2, 1, 1, 0, 62 , 141, 0  , 0  , 0  , 0  ), // #337 {mm, mm|m64|mem}
+  ROW(2, 1, 1, 0, 50 , 51 , 0  , 0  , 0  , 0  ), //      {xmm, xmm|m128|mem}
+  ROW(3, 1, 1, 0, 62 , 141, 10 , 0  , 0  , 0  ), // #339 {mm, mm|m64|mem, i8|u8}
+  ROW(3, 1, 1, 0, 50 , 51 , 10 , 0  , 0  , 0  ), //      {xmm, xmm|m128|mem, i8|u8}
+  ROW(3, 1, 1, 0, 6  , 64 , 10 , 0  , 0  , 0  ), // #341 {r32, mm|xmm, i8|u8}
+  ROW(3, 1, 1, 0, 21 , 50 , 10 , 0  , 0  , 0  ), //      {m16|mem, xmm, i8|u8}
+  ROW(2, 1, 1, 0, 62 , 142, 0  , 0  , 0  , 0  ), // #343 {mm, i8|u8|mm|m64|mem}
+  ROW(2, 1, 1, 0, 50 , 59 , 0  , 0  , 0  , 0  ), //      {xmm, i8|u8|xmm|m128|mem}
+  ROW(2, 1, 1, 0, 62 , 143, 0  , 0  , 0  , 0  ), // #345 {mm, mm|m32|mem}
+  ROW(2, 1, 1, 0, 50 , 51 , 0  , 0  , 0  , 0  ), //      {xmm, xmm|m128|mem}
+  ROW(1, 1, 0, 0, 6  , 0  , 0  , 0  , 0  , 0  ), // #347 {r32}
+  ROW(1, 0, 1, 0, 8  , 0  , 0  , 0  , 0  , 0  ), // #348 {r64}
+  ROW(0, 1, 1, 0, 0  , 0  , 0  , 0  , 0  , 0  ), // #349 {}
+  ROW(1, 1, 1, 0, 144, 0  , 0  , 0  , 0  , 0  ), //      {u16}
+  ROW(3, 1, 1, 0, 6  , 28 , 10 , 0  , 0  , 0  ), // #351 {r32, r32|m32|mem, i8|u8}
+  ROW(3, 0, 1, 0, 8  , 15 , 10 , 0  , 0  , 0  ), //      {r64, r64|m64|mem, i8|u8}
+  ROW(4, 1, 1, 0, 50 , 50 , 51 , 50 , 0  , 0  ), // #353 {xmm, xmm, xmm|m128|mem, xmm}
+  ROW(4, 1, 1, 0, 53 , 53 , 54 , 53 , 0  , 0  ), //      {ymm, ymm, ymm|m256|mem, ymm}
+  ROW(2, 1, 1, 0, 50 , 145, 0  , 0  , 0  , 0  ), // #355 {xmm, xmm|m128|ymm|m256}
+  ROW(2, 1, 1, 0, 53 , 57 , 0  , 0  , 0  , 0  ), //      {ymm, zmm|m512|mem}
+  ROW(4, 1, 1, 0, 50 , 50 , 50 , 65 , 0  , 0  ), // #357 {xmm, xmm, xmm, xmm|m64|mem}
+  ROW(4, 1, 1, 0, 50 , 50 , 30 , 50 , 0  , 0  ), //      {xmm, xmm, m64|mem, xmm}
+  ROW(4, 1, 1, 0, 50 , 50 , 50 , 107, 0  , 0  ), // #359 {xmm, xmm, xmm, xmm|m32|mem}
+  ROW(4, 1, 1, 0, 50 , 50 , 29 , 50 , 0  , 0  ), //      {xmm, xmm, m32|mem, xmm}
+  ROW(4, 1, 1, 0, 53 , 53 , 51 , 10 , 0  , 0  ), // #361 {ymm, ymm, xmm|m128|mem, i8|u8}
+  ROW(4, 1, 1, 0, 56 , 56 , 51 , 10 , 0  , 0  ), //      {zmm, zmm, xmm|m128|mem, i8|u8}
+  ROW(1, 1, 0, 1, 36 , 0  , 0  , 0  , 0  , 0  ), // #363 {<eax>}
+  ROW(1, 0, 1, 1, 38 , 0  , 0  , 0  , 0  , 0  ), // #364 {<rax>}
+  ROW(2, 1, 1, 0, 28 , 50 , 0  , 0  , 0  , 0  ), // #365 {r32|m32|mem, xmm}
+  ROW(2, 1, 1, 0, 50 , 28 , 0  , 0  , 0  , 0  ), //      {xmm, r32|m32|mem}
+  ROW(2, 1, 1, 0, 30 , 50 , 0  , 0  , 0  , 0  ), // #367 {m64|mem, xmm}
+  ROW(3, 1, 1, 0, 50 , 50 , 30 , 0  , 0  , 0  ), //      {xmm, xmm, m64|mem}
+  ROW(2, 1, 1, 0, 135, 50 , 0  , 0  , 0  , 0  ), // #369 {r32|m16|mem, xmm}
+  ROW(2, 1, 1, 0, 50 , 135, 0  , 0  , 0  , 0  ), //      {xmm, r32|m16|mem}
+  ROW(2, 1, 0, 0, 28 , 6  , 0  , 0  , 0  , 0  ), // #371 {r32|m32|mem, r32}
+  ROW(2, 0, 1, 0, 15 , 8  , 0  , 0  , 0  , 0  ), //      {r64|m64|mem, r64}
+  ROW(2, 1, 0, 0, 6  , 28 , 0  , 0  , 0  , 0  ), // #373 {r32, r32|m32|mem}
+  ROW(2, 0, 1, 0, 8  , 15 , 0  , 0  , 0  , 0  ), //      {r64, r64|m64|mem}
+  ROW(3, 1, 1, 0, 50 , 50 , 59 , 0  , 0  , 0  ), // #375 {xmm, xmm, xmm|m128|mem|i8|u8}
+  ROW(3, 1, 1, 0, 50 , 52 , 146, 0  , 0  , 0  ), //      {xmm, m128|mem, i8|u8|xmm}
+  ROW(2, 1, 1, 0, 67 , 96 , 0  , 0  , 0  , 0  ), // #377 {vm32x, xmm|ymm}
+  ROW(2, 1, 1, 0, 68 , 56 , 0  , 0  , 0  , 0  ), //      {vm32y, zmm}
+  ROW(2, 1, 1, 0, 108, 50 , 0  , 0  , 0  , 0  ), // #379 {vm64x|vm64y, xmm}
+  ROW(2, 1, 1, 0, 72 , 53 , 0  , 0  , 0  , 0  ), //      {vm64z, ymm}
+  ROW(3, 1, 1, 0, 50 , 50 , 51 , 0  , 0  , 0  ), // #381 {xmm, xmm, xmm|m128|mem}
+  ROW(3, 1, 1, 0, 50 , 52 , 50 , 0  , 0  , 0  ), //      {xmm, m128|mem, xmm}
+  ROW(1, 1, 0, 1, 33 , 0  , 0  , 0  , 0  , 0  ), // #383 {<ax>}
+  ROW(2, 1, 0, 1, 33 , 10 , 0  , 0  , 0  , 0  ), // #384 {<ax>, i8|u8}
+  ROW(2, 1, 0, 0, 27 , 4  , 0  , 0  , 0  , 0  ), // #385 {r16|m16|mem, r16}
+  ROW(3, 1, 1, 1, 50 , 51 , 147, 0  , 0  , 0  ), // #386 {xmm, xmm|m128|mem, <xmm0>}
+  ROW(2, 1, 1, 0, 111, 148, 0  , 0  , 0  , 0  ), // #387 {bnd, mib}
+  ROW(2, 1, 1, 0, 111, 113, 0  , 0  , 0  , 0  ), // #388 {bnd, mem}
+  ROW(2, 1, 1, 0, 148, 111, 0  , 0  , 0  , 0  ), // #389 {mib, bnd}
+  ROW(1, 1, 1, 0, 149, 0  , 0  , 0  , 0  , 0  ), // #390 {r16|r32|r64}
+  ROW(1, 1, 1, 1, 33 , 0  , 0  , 0  , 0  , 0  ), // #391 {<ax>}
+  ROW(2, 1, 1, 2, 35 , 36 , 0  , 0  , 0  , 0  ), // #392 {<edx>, <eax>}
+  ROW(1, 1, 1, 0, 150, 0  , 0  , 0  , 0  , 0  ), // #393 {mem|m8|m16|m32|m48|m64|m80|m128|m256|m512|m1024}
+  ROW(1, 1, 1, 0, 30 , 0  , 0  , 0  , 0  , 0  ), // #394 {m64|mem}
+  ROW(0, 0, 1, 0, 0  , 0  , 0  , 0  , 0  , 0  ), // #395 {}
+  ROW(1, 1, 1, 1, 151, 0  , 0  , 0  , 0  , 0  ), // #396 {<ds:[mem|m512|memBase|zax]>}
+  ROW(3, 1, 1, 0, 50 , 65 , 10 , 0  , 0  , 0  ), // #397 {xmm, xmm|m64|mem, i8|u8}
+  ROW(3, 1, 1, 0, 50 , 107, 10 , 0  , 0  , 0  ), // #398 {xmm, xmm|m32|mem, i8|u8}
+  ROW(5, 0, 1, 4, 52 , 37 , 38 , 152, 153, 0  ), // #399 {m128|mem, <rdx>, <rax>, <rcx>, <rbx>}
+  ROW(5, 1, 1, 4, 30 , 35 , 36 , 121, 154, 0  ), // #400 {m64|mem, <edx>, <eax>, <ecx>, <ebx>}
+  ROW(4, 1, 1, 4, 36 , 154, 121, 35 , 0  , 0  ), // #401 {<eax>, <ebx>, <ecx>, <edx>}
+  ROW(2, 0, 1, 2, 37 , 38 , 0  , 0  , 0  , 0  ), // #402 {<rdx>, <rax>}
+  ROW(2, 1, 1, 0, 62 , 51 , 0  , 0  , 0  , 0  ), // #403 {mm, xmm|m128|mem}
+  ROW(2, 1, 1, 0, 50 , 141, 0  , 0  , 0  , 0  ), // #404 {xmm, mm|m64|mem}
+  ROW(2, 1, 1, 0, 62 , 65 , 0  , 0  , 0  , 0  ), // #405 {mm, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 139, 65 , 0  , 0  , 0  , 0  ), // #406 {r32|r64, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 50 , 155, 0  , 0  , 0  , 0  ), // #407 {xmm, r32|m32|mem|r64|m64}
+  ROW(2, 1, 1, 0, 139, 107, 0  , 0  , 0  , 0  ), // #408 {r32|r64, xmm|m32|mem}
+  ROW(2, 1, 1, 2, 34 , 33 , 0  , 0  , 0  , 0  ), // #409 {<dx>, <ax>}
+  ROW(1, 1, 1, 1, 36 , 0  , 0  , 0  , 0  , 0  ), // #410 {<eax>}
+  ROW(2, 1, 1, 0, 12 , 10 , 0  , 0  , 0  , 0  ), // #411 {i16|u16, i8|u8}
+  ROW(3, 1, 1, 0, 28 , 50 , 10 , 0  , 0  , 0  ), // #412 {r32|m32|mem, xmm, i8|u8}
+  ROW(1, 1, 1, 0, 102, 0  , 0  , 0  , 0  , 0  ), // #413 {m80|mem}
+  ROW(1, 1, 1, 0, 156, 0  , 0  , 0  , 0  , 0  ), // #414 {m16|m32}
+  ROW(1, 1, 1, 0, 157, 0  , 0  , 0  , 0  , 0  ), // #415 {m16|m32|m64}
+  ROW(1, 1, 1, 0, 158, 0  , 0  , 0  , 0  , 0  ), // #416 {m32|m64|m80|st}
+  ROW(1, 1, 1, 0, 21 , 0  , 0  , 0  , 0  , 0  ), // #417 {m16|mem}
+  ROW(1, 1, 1, 0, 113, 0  , 0  , 0  , 0  , 0  ), // #418 {mem}
+  ROW(1, 1, 1, 0, 159, 0  , 0  , 0  , 0  , 0  ), // #419 {ax|m16|mem}
+  ROW(1, 0, 1, 0, 113, 0  , 0  , 0  , 0  , 0  ), // #420 {mem}
+  ROW(2, 1, 1, 1, 10 , 36 , 0  , 0  , 0  , 0  ), // #421 {i8|u8, <eax>}
+  ROW(2, 1, 1, 0, 160, 161, 0  , 0  , 0  , 0  ), // #422 {al|ax|eax, i8|u8|dx}
+  ROW(1, 1, 1, 0, 6  , 0  , 0  , 0  , 0  , 0  ), // #423 {r32}
+  ROW(2, 1, 1, 0, 162, 163, 0  , 0  , 0  , 0  ), // #424 {es:[m8|memBase|zdi|m16|m32], dx}
+  ROW(1, 1, 1, 0, 10 , 0  , 0  , 0  , 0  , 0  ), // #425 {i8|u8}
+  ROW(0, 1, 0, 0, 0  , 0  , 0  , 0  , 0  , 0  ), // #426 {}
+  ROW(3, 1, 1, 0, 106, 106, 106, 0  , 0  , 0  ), // #427 {k, k, k}
+  ROW(2, 1, 1, 0, 106, 106, 0  , 0  , 0  , 0  ), // #428 {k, k}
+  ROW(3, 1, 1, 0, 106, 106, 10 , 0  , 0  , 0  ), // #429 {k, k, i8|u8}
+  ROW(1, 1, 1, 1, 164, 0  , 0  , 0  , 0  , 0  ), // #430 {<ah>}
+  ROW(1, 1, 1, 0, 29 , 0  , 0  , 0  , 0  , 0  ), // #431 {m32|mem}
+  ROW(1, 0, 1, 0, 58 , 0  , 0  , 0  , 0  , 0  ), // #432 {m512|mem}
+  ROW(2, 1, 1, 0, 149, 150, 0  , 0  , 0  , 0  ), // #433 {r16|r32|r64, mem|m8|m16|m32|m48|m64|m80|m128|m256|m512|m1024}
+  ROW(1, 1, 1, 0, 27 , 0  , 0  , 0  , 0  , 0  ), // #434 {r16|m16|mem}
+  ROW(1, 1, 1, 0, 139, 0  , 0  , 0  , 0  , 0  ), // #435 {r32|r64}
+  ROW(3, 1, 1, 0, 139, 28 , 14 , 0  , 0  , 0  ), // #436 {r32|r64, r32|m32|mem, i32|u32}
+  ROW(3, 1, 1, 1, 50 , 50 , 165, 0  , 0  , 0  ), // #437 {xmm, xmm, <ds:[mem|m128|memBase|zdi]>}
+  ROW(3, 1, 1, 1, 62 , 62 , 166, 0  , 0  , 0  ), // #438 {mm, mm, <ds:[mem|m64|memBase|zdi]>}
+  ROW(3, 1, 1, 3, 167, 121, 35 , 0  , 0  , 0  ), // #439 {<ds:[mem|memBase|zax]>, <ecx>, <edx>}
+  ROW(2, 1, 1, 0, 119, 58 , 0  , 0  , 0  , 0  ), // #440 {es:[mem|m512|memBase], m512|mem}
+  ROW(2, 1, 1, 0, 62 , 50 , 0  , 0  , 0  , 0  ), // #441 {mm, xmm}
+  ROW(2, 1, 1, 0, 6  , 50 , 0  , 0  , 0  , 0  ), // #442 {r32, xmm}
+  ROW(2, 1, 1, 0, 30 , 62 , 0  , 0  , 0  , 0  ), // #443 {m64|mem, mm}
+  ROW(2, 1, 1, 0, 50 , 62 , 0  , 0  , 0  , 0  ), // #444 {xmm, mm}
+  ROW(2, 1, 1, 2, 36 , 121, 0  , 0  , 0  , 0  ), // #445 {<eax>, <ecx>}
+  ROW(3, 1, 1, 3, 36 , 121, 154, 0  , 0  , 0  ), // #446 {<eax>, <ecx>, <ebx>}
+  ROW(2, 1, 1, 0, 168, 160, 0  , 0  , 0  , 0  ), // #447 {u8|dx, al|ax|eax}
+  ROW(2, 1, 1, 0, 163, 169, 0  , 0  , 0  , 0  ), // #448 {dx, ds:[m8|memBase|zsi|m16|m32]}
+  ROW(6, 1, 1, 3, 50 , 51 , 10 , 121, 36 , 35 ), // #449 {xmm, xmm|m128|mem, i8|u8, <ecx>, <eax>, <edx>}
+  ROW(6, 1, 1, 3, 50 , 51 , 10 , 147, 36 , 35 ), // #450 {xmm, xmm|m128|mem, i8|u8, <xmm0>, <eax>, <edx>}
+  ROW(4, 1, 1, 1, 50 , 51 , 10 , 121, 0  , 0  ), // #451 {xmm, xmm|m128|mem, i8|u8, <ecx>}
+  ROW(4, 1, 1, 1, 50 , 51 , 10 , 147, 0  , 0  ), // #452 {xmm, xmm|m128|mem, i8|u8, <xmm0>}
+  ROW(3, 1, 1, 0, 131, 50 , 10 , 0  , 0  , 0  ), // #453 {r32|m8|mem, xmm, i8|u8}
+  ROW(3, 0, 1, 0, 15 , 50 , 10 , 0  , 0  , 0  ), // #454 {r64|m64|mem, xmm, i8|u8}
+  ROW(3, 1, 1, 0, 50 , 131, 10 , 0  , 0  , 0  ), // #455 {xmm, r32|m8|mem, i8|u8}
+  ROW(3, 1, 1, 0, 50 , 28 , 10 , 0  , 0  , 0  ), // #456 {xmm, r32|m32|mem, i8|u8}
+  ROW(3, 0, 1, 0, 50 , 15 , 10 , 0  , 0  , 0  ), // #457 {xmm, r64|m64|mem, i8|u8}
+  ROW(3, 1, 1, 0, 64 , 135, 10 , 0  , 0  , 0  ), // #458 {mm|xmm, r32|m16|mem, i8|u8}
+  ROW(2, 1, 1, 0, 6  , 64 , 0  , 0  , 0  , 0  ), // #459 {r32, mm|xmm}
+  ROW(2, 1, 1, 0, 50 , 10 , 0  , 0  , 0  , 0  ), // #460 {xmm, i8|u8}
+  ROW(1, 1, 1, 0, 155, 0  , 0  , 0  , 0  , 0  ), // #461 {r32|m32|mem|r64|m64}
+  ROW(2, 1, 1, 0, 31 , 103, 0  , 0  , 0  , 0  ), // #462 {r8lo|r8hi|m8|r16|m16|r32|m32|r64|m64|mem, cl|i8|u8}
+  ROW(1, 0, 1, 0, 139, 0  , 0  , 0  , 0  , 0  ), // #463 {r32|r64}
+  ROW(3, 1, 1, 3, 35 , 36 , 121, 0  , 0  , 0  ), // #464 {<edx>, <eax>, <ecx>}
+  ROW(1, 1, 1, 0, 1  , 0  , 0  , 0  , 0  , 0  ), // #465 {r8lo|r8hi|m8|mem}
+  ROW(1, 1, 1, 0, 170, 0  , 0  , 0  , 0  , 0  ), // #466 {r16|m16|mem|r32|r64}
+  ROW(3, 0, 1, 0, 171, 171, 171, 0  , 0  , 0  ), // #467 {tmm, tmm, tmm}
+  ROW(2, 0, 1, 0, 171, 172, 0  , 0  , 0  , 0  ), // #468 {tmm, tmem}
+  ROW(2, 0, 1, 0, 172, 171, 0  , 0  , 0  , 0  ), // #469 {tmem, tmm}
+  ROW(1, 0, 1, 0, 171, 0  , 0  , 0  , 0  , 0  ), // #470 {tmm}
+  ROW(3, 1, 1, 2, 6  , 35 , 36 , 0  , 0  , 0  ), // #471 {r32, <edx>, <eax>}
+  ROW(1, 1, 1, 0, 173, 0  , 0  , 0  , 0  , 0  ), // #472 {ds:[mem|memBase]}
+  ROW(6, 1, 1, 0, 56 , 56 , 56 , 56 , 56 , 52 ), // #473 {zmm, zmm, zmm, zmm, zmm, m128|mem}
+  ROW(6, 1, 1, 0, 50 , 50 , 50 , 50 , 50 , 52 ), // #474 {xmm, xmm, xmm, xmm, xmm, m128|mem}
+  ROW(3, 1, 1, 0, 50 , 50 , 65 , 0  , 0  , 0  ), // #475 {xmm, xmm, xmm|m64|mem}
+  ROW(3, 1, 1, 0, 50 , 50 , 110, 0  , 0  , 0  ), // #476 {xmm, xmm, xmm|m16|mem}
+  ROW(3, 1, 1, 0, 50 , 50 , 107, 0  , 0  , 0  ), // #477 {xmm, xmm, xmm|m32|mem}
+  ROW(2, 1, 1, 0, 53 , 52 , 0  , 0  , 0  , 0  ), // #478 {ymm, m128|mem}
+  ROW(2, 1, 1, 0, 174, 65 , 0  , 0  , 0  , 0  ), // #479 {ymm|zmm, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 174, 52 , 0  , 0  , 0  , 0  ), // #480 {ymm|zmm, m128|mem}
+  ROW(2, 1, 1, 0, 56 , 55 , 0  , 0  , 0  , 0  ), // #481 {zmm, m256|mem}
+  ROW(2, 1, 1, 0, 175, 65 , 0  , 0  , 0  , 0  ), // #482 {xmm|ymm|zmm, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 175, 107, 0  , 0  , 0  , 0  ), // #483 {xmm|ymm|zmm, m32|mem|xmm}
+  ROW(4, 1, 1, 0, 104, 50 , 65 , 10 , 0  , 0  ), // #484 {xmm|k, xmm, xmm|m64|mem, i8|u8}
+  ROW(4, 1, 1, 0, 106, 50 , 110, 10 , 0  , 0  ), // #485 {k, xmm, xmm|m16|mem, i8|u8}
+  ROW(4, 1, 1, 0, 104, 50 , 107, 10 , 0  , 0  ), // #486 {xmm|k, xmm, xmm|m32|mem, i8|u8}
+  ROW(2, 1, 1, 0, 50 , 176, 0  , 0  , 0  , 0  ), // #487 {xmm, xmm|m128|ymm|m256|zmm|m512}
+  ROW(2, 1, 1, 0, 139, 110, 0  , 0  , 0  , 0  ), // #488 {r32|r64, xmm|m16|mem}
+  ROW(3, 1, 1, 0, 50 , 50 , 155, 0  , 0  , 0  ), // #489 {xmm, xmm, r32|m32|mem|r64|m64}
+  ROW(3, 1, 1, 0, 51 , 174, 10 , 0  , 0  , 0  ), // #490 {xmm|m128|mem, ymm|zmm, i8|u8}
+  ROW(4, 1, 1, 0, 50 , 50 , 65 , 10 , 0  , 0  ), // #491 {xmm, xmm, xmm|m64|mem, i8|u8}
+  ROW(4, 1, 1, 0, 50 , 50 , 107, 10 , 0  , 0  ), // #492 {xmm, xmm, xmm|m32|mem, i8|u8}
+  ROW(3, 1, 1, 0, 106, 176, 10 , 0  , 0  , 0  ), // #493 {k, xmm|m128|ymm|m256|zmm|m512, i8|u8}
+  ROW(3, 1, 1, 0, 106, 65 , 10 , 0  , 0  , 0  ), // #494 {k, xmm|m64|mem, i8|u8}
+  ROW(3, 1, 1, 0, 106, 110, 10 , 0  , 0  , 0  ), // #495 {k, xmm|m16|mem, i8|u8}
+  ROW(3, 1, 1, 0, 106, 107, 10 , 0  , 0  , 0  ), // #496 {k, xmm|m32|mem, i8|u8}
+  ROW(1, 1, 1, 0, 68 , 0  , 0  , 0  , 0  , 0  ), // #497 {vm32y}
+  ROW(1, 1, 1, 0, 69 , 0  , 0  , 0  , 0  , 0  ), // #498 {vm32z}
+  ROW(1, 1, 1, 0, 72 , 0  , 0  , 0  , 0  , 0  ), // #499 {vm64z}
+  ROW(4, 1, 1, 0, 50 , 50 , 110, 10 , 0  , 0  ), // #500 {xmm, xmm, xmm|m16|mem, i8|u8}
+  ROW(4, 1, 1, 0, 56 , 56 , 54 , 10 , 0  , 0  ), // #501 {zmm, zmm, ymm|m256|mem, i8|u8}
+  ROW(2, 1, 1, 0, 6  , 96 , 0  , 0  , 0  , 0  ), // #502 {r32, xmm|ymm}
+  ROW(2, 1, 1, 0, 175, 177, 0  , 0  , 0  , 0  ), // #503 {xmm|ymm|zmm, xmm|m8|mem|r32}
+  ROW(2, 1, 1, 0, 175, 178, 0  , 0  , 0  , 0  ), // #504 {xmm|ymm|zmm, xmm|m32|mem|r32}
+  ROW(2, 1, 1, 0, 175, 106, 0  , 0  , 0  , 0  ), // #505 {xmm|ymm|zmm, k}
+  ROW(2, 1, 1, 0, 175, 179, 0  , 0  , 0  , 0  ), // #506 {xmm|ymm|zmm, xmm|m16|mem|r32}
+  ROW(3, 1, 1, 0, 135, 50 , 10 , 0  , 0  , 0  ), // #507 {r32|m16|mem, xmm, i8|u8}
+  ROW(4, 1, 1, 0, 50 , 50 , 131, 10 , 0  , 0  ), // #508 {xmm, xmm, r32|m8|mem, i8|u8}
+  ROW(4, 1, 1, 0, 50 , 50 , 28 , 10 , 0  , 0  ), // #509 {xmm, xmm, r32|m32|mem, i8|u8}
+  ROW(4, 0, 1, 0, 50 , 50 , 15 , 10 , 0  , 0  ), // #510 {xmm, xmm, r64|m64|mem, i8|u8}
+  ROW(4, 1, 1, 0, 50 , 50 , 135, 10 , 0  , 0  ), // #511 {xmm, xmm, r32|m16|mem, i8|u8}
+  ROW(2, 1, 1, 0, 106, 175, 0  , 0  , 0  , 0  ), // #512 {k, xmm|ymm|zmm}
+  ROW(1, 1, 1, 0, 124, 0  , 0  , 0  , 0  , 0  ), // #513 {rel16|rel32}
+  ROW(3, 1, 1, 2, 113, 35 , 36 , 0  , 0  , 0  ), // #514 {mem, <edx>, <eax>}
+  ROW(3, 0, 1, 2, 113, 35 , 36 , 0  , 0  , 0  )  // #515 {mem, <edx>, <eax>}
+};
+#undef ROW
+
+#define ROW(opFlags, regId) { opFlags, uint8_t(regId) }
+#define F(VAL) uint64_t(InstDB::OpFlags::k##VAL)
+const InstDB::OpSignature InstDB::_opSignatureTable[] = {
+  ROW(0, 0xFF),
+  ROW(F(RegGpbLo) | F(RegGpbHi) | F(MemUnspecified) | F(Mem8), 0x00),
+  ROW(F(RegGpbLo) | F(RegGpbHi), 0x00),
+  ROW(F(RegGpw) | F(RegSReg) | F(MemUnspecified) | F(Mem16), 0x00),
+  ROW(F(RegGpw), 0x00),
+  ROW(F(RegGpd) | F(RegSReg) | F(MemUnspecified) | F(Mem32), 0x00),
+  ROW(F(RegGpd), 0x00),
+  ROW(F(RegGpq) | F(RegSReg) | F(RegCReg) | F(RegDReg) | F(MemUnspecified) | F(Mem64), 0x00),
+  ROW(F(RegGpq), 0x00),
+  ROW(F(RegGpbLo) | F(RegGpbHi) | F(Mem8), 0x00),
+  ROW(F(ImmI8) | F(ImmU8), 0x00),
+  ROW(F(RegGpw) | F(Mem16), 0x00),
+  ROW(F(ImmI16) | F(ImmU16), 0x00),
+  ROW(F(RegGpd) | F(Mem32), 0x00),
+  ROW(F(ImmI32) | F(ImmU32), 0x00),
+  ROW(F(RegGpq) | F(MemUnspecified) | F(Mem64), 0x00),
+  ROW(F(ImmI32), 0x00),
+  ROW(F(RegSReg) | F(RegCReg) | F(RegDReg) | F(MemUnspecified) | F(Mem64) | F(ImmI64) | F(ImmU64), 0x00),
+  ROW(F(MemUnspecified) | F(Mem8), 0x00),
+  ROW(F(RegSReg) | F(MemUnspecified) | F(Mem16), 0x00),
+  ROW(F(RegSReg) | F(MemUnspecified) | F(Mem32), 0x00),
+  ROW(F(MemUnspecified) | F(Mem16), 0x00),
+  ROW(F(RegSReg), 0x00),
+  ROW(F(RegCReg) | F(RegDReg), 0x00),
+  ROW(F(RegGpq) | F(ImmI32), 0x00),
+  ROW(F(RegGpw) | F(RegGpd) | F(RegGpq) | F(MemUnspecified) | F(Mem16) | F(Mem32) | F(Mem64), 0x00),
+  ROW(F(ImmI8), 0x00),
+  ROW(F(RegGpw) | F(MemUnspecified) | F(Mem16), 0x00),
+  ROW(F(RegGpd) | F(MemUnspecified) | F(Mem32), 0x00),
+  ROW(F(MemUnspecified) | F(Mem32), 0x00),
+  ROW(F(MemUnspecified) | F(Mem64), 0x00),
+  ROW(F(RegGpbLo) | F(RegGpbHi) | F(RegGpw) | F(RegGpd) | F(RegGpq) | F(MemUnspecified) | F(Mem8) | F(Mem16) | F(Mem32) | F(Mem64), 0x00),
+  ROW(F(RegGpq) | F(MemUnspecified) | F(Mem64) | F(ImmI32) | F(ImmU32), 0x00),
+  ROW(F(RegGpw) | F(FlagImplicit), 0x01),
+  ROW(F(RegGpw) | F(FlagImplicit), 0x04),
+  ROW(F(RegGpd) | F(FlagImplicit), 0x04),
+  ROW(F(RegGpd) | F(FlagImplicit), 0x01),
+  ROW(F(RegGpq) | F(FlagImplicit), 0x04),
+  ROW(F(RegGpq) | F(FlagImplicit), 0x01),
+  ROW(F(RegGpw) | F(MemUnspecified) | F(Mem16) | F(ImmI8) | F(ImmI16), 0x00),
+  ROW(F(RegGpd) | F(MemUnspecified) | F(Mem32) | F(ImmI8) | F(ImmI32), 0x00),
+  ROW(F(RegGpq) | F(MemUnspecified) | F(Mem64) | F(ImmI8) | F(ImmI32), 0x00),
+  ROW(F(ImmI8) | F(ImmI16) | F(ImmU16), 0x00),
+  ROW(F(ImmI8) | F(ImmI32) | F(ImmU32), 0x00),
+  ROW(F(ImmI8) | F(ImmI32), 0x00),
+  ROW(F(ImmI64) | F(ImmU64), 0x00),
+  ROW(F(RegGpbLo), 0x01),
+  ROW(F(RegGpw), 0x01),
+  ROW(F(RegGpd), 0x01),
+  ROW(F(RegGpq), 0x01),
+  ROW(F(RegXmm), 0x00),
+  ROW(F(RegXmm) | F(MemUnspecified) | F(Mem128), 0x00),
+  ROW(F(MemUnspecified) | F(Mem128), 0x00),
+  ROW(F(RegYmm), 0x00),
+  ROW(F(RegYmm) | F(MemUnspecified) | F(Mem256), 0x00),
+  ROW(F(MemUnspecified) | F(Mem256), 0x00),
+  ROW(F(RegZmm), 0x00),
+  ROW(F(RegZmm) | F(MemUnspecified) | F(Mem512), 0x00),
+  ROW(F(MemUnspecified) | F(Mem512), 0x00),
+  ROW(F(RegXmm) | F(MemUnspecified) | F(Mem128) | F(ImmI8) | F(ImmU8), 0x00),
+  ROW(F(RegYmm) | F(MemUnspecified) | F(Mem256) | F(ImmI8) | F(ImmU8), 0x00),
+  ROW(F(RegZmm) | F(MemUnspecified) | F(Mem512) | F(ImmI8) | F(ImmU8), 0x00),
+  ROW(F(RegMm), 0x00),
+  ROW(F(RegGpq) | F(RegMm) | F(MemUnspecified) | F(Mem64), 0x00),
+  ROW(F(RegXmm) | F(RegMm), 0x00),
+  ROW(F(RegXmm) | F(MemUnspecified) | F(Mem64), 0x00),
+  ROW(F(RegGpw) | F(RegGpd) | F(RegGpq) | F(Mem16) | F(Mem32) | F(Mem64), 0x00),
+  ROW(F(Vm32x), 0x00),
+  ROW(F(Vm32y), 0x00),
+  ROW(F(Vm32z), 0x00),
+  ROW(F(Vm64x), 0x00),
+  ROW(F(Vm64y), 0x00),
+  ROW(F(Vm64z), 0x00),
+  ROW(F(Mem8) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x40),
+  ROW(F(Mem8) | F(FlagMemBase) | F(FlagMemEs) | F(FlagImplicit), 0x80),
+  ROW(F(Mem16) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x40),
+  ROW(F(Mem16) | F(FlagMemBase) | F(FlagMemEs) | F(FlagImplicit), 0x80),
+  ROW(F(Mem32) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x40),
+  ROW(F(Mem32) | F(FlagMemBase) | F(FlagMemEs) | F(FlagImplicit), 0x80),
+  ROW(F(Mem64) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x40),
+  ROW(F(Mem64) | F(FlagMemBase) | F(FlagMemEs) | F(FlagImplicit), 0x80),
+  ROW(F(RegGpbLo) | F(FlagImplicit), 0x01),
+  ROW(F(MemUnspecified) | F(Mem8) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x40),
+  ROW(F(MemUnspecified) | F(Mem16) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x40),
+  ROW(F(MemUnspecified) | F(Mem32) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x40),
+  ROW(F(MemUnspecified) | F(Mem64) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x40),
+  ROW(F(RegGpw) | F(RegGpq) | F(Mem16) | F(Mem64), 0x00),
+  ROW(F(RegSReg), 0x1A),
+  ROW(F(RegSReg), 0x60),
+  ROW(F(RegGpw) | F(RegGpq) | F(Mem16) | F(Mem64) | F(ImmI8) | F(ImmI16) | F(ImmI32), 0x00),
+  ROW(F(RegGpd) | F(Mem32) | F(ImmI32) | F(ImmU32), 0x00),
+  ROW(F(RegSReg), 0x1E),
+  ROW(F(MemUnspecified) | F(Mem8) | F(FlagMemBase) | F(FlagMemEs) | F(FlagImplicit), 0x80),
+  ROW(F(MemUnspecified) | F(Mem16) | F(FlagMemBase) | F(FlagMemEs) | F(FlagImplicit), 0x80),
+  ROW(F(MemUnspecified) | F(Mem32) | F(FlagMemBase) | F(FlagMemEs) | F(FlagImplicit), 0x80),
+  ROW(F(MemUnspecified) | F(Mem64) | F(FlagMemBase) | F(FlagMemEs) | F(FlagImplicit), 0x80),
+  ROW(F(RegXmm) | F(RegYmm), 0x00),
+  ROW(F(ImmI4) | F(ImmU4), 0x00),
+  ROW(F(Mem32) | F(Mem64), 0x00),
+  ROW(F(RegSt), 0x01),
+  ROW(F(RegSt), 0x00),
+  ROW(F(MemUnspecified) | F(Mem48), 0x00),
+  ROW(F(MemUnspecified) | F(Mem80), 0x00),
+  ROW(F(RegGpbLo) | F(ImmI8) | F(ImmU8), 0x02),
+  ROW(F(RegXmm) | F(RegKReg), 0x00),
+  ROW(F(RegYmm) | F(RegKReg), 0x00),
+  ROW(F(RegKReg), 0x00),
+  ROW(F(RegXmm) | F(MemUnspecified) | F(Mem32), 0x00),
+  ROW(F(Vm64x) | F(Vm64y), 0x00),
+  ROW(F(RegGpq) | F(RegXmm) | F(MemUnspecified) | F(Mem64), 0x00),
+  ROW(F(RegXmm) | F(MemUnspecified) | F(Mem16), 0x00),
+  ROW(F(RegBnd), 0x00),
+  ROW(F(RegBnd) | F(MemUnspecified), 0x00),
+  ROW(F(MemUnspecified), 0x00),
+  ROW(F(RegGpw) | F(RegGpd) | F(Mem16) | F(Mem32) | F(ImmI32) | F(ImmI64) | F(Rel32), 0x00),
+  ROW(F(RegGpq) | F(MemUnspecified) | F(Mem64) | F(ImmI32) | F(ImmI64) | F(Rel32), 0x00),
+  ROW(F(RegGpbLo) | F(RegGpbHi) | F(RegGpw) | F(RegGpd) | F(Mem8) | F(Mem16) | F(Mem32), 0x00),
+  ROW(F(RegGpbLo) | F(RegGpbHi) | F(RegGpq) | F(Mem8) | F(Mem64), 0x00),
+  ROW(F(RegGpw) | F(RegGpd), 0x00),
+  ROW(F(MemUnspecified) | F(Mem512) | F(FlagMemBase) | F(FlagMemEs), 0x00),
+  ROW(F(RegSt) | F(Mem32) | F(Mem64), 0x00),
+  ROW(F(RegGpd) | F(FlagImplicit), 0x02),
+  ROW(F(RegGpd) | F(RegGpq) | F(FlagImplicit), 0x01),
+  ROW(F(ImmI32) | F(ImmI64) | F(Rel8) | F(Rel32), 0x00),
+  ROW(F(ImmI32) | F(ImmI64) | F(Rel32), 0x00),
+  ROW(F(RegGpw) | F(RegGpd) | F(FlagImplicit), 0x02),
+  ROW(F(ImmI32) | F(ImmI64) | F(Rel8), 0x00),
+  ROW(F(RegGpd) | F(RegGpq) | F(FlagImplicit), 0x02),
+  ROW(F(RegGpq) | F(MemUnspecified) | F(Mem64) | F(ImmI32) | F(ImmI64) | F(Rel8) | F(Rel32), 0x00),
+  ROW(F(RegGpd) | F(MemUnspecified) | F(Mem32) | F(ImmI32) | F(ImmI64) | F(Rel32), 0x00),
+  ROW(F(RegGpd) | F(RegKReg) | F(MemUnspecified) | F(Mem8), 0x00),
+  ROW(F(RegGpd) | F(MemUnspecified) | F(Mem8), 0x00),
+  ROW(F(RegGpd) | F(RegKReg) | F(MemUnspecified) | F(Mem32), 0x00),
+  ROW(F(RegGpq) | F(RegKReg) | F(MemUnspecified) | F(Mem64), 0x00),
+  ROW(F(RegGpd) | F(RegKReg) | F(MemUnspecified) | F(Mem16), 0x00),
+  ROW(F(RegGpd) | F(MemUnspecified) | F(Mem16), 0x00),
+  ROW(F(ImmI16), 0x00),
+  ROW(F(ImmI16) | F(ImmI32), 0x00),
+  ROW(F(MemUnspecified) | F(Mem32) | F(Mem48) | F(Mem80), 0x00),
+  ROW(F(RegGpd) | F(RegGpq), 0x00),
+  ROW(F(RegGpbLo) | F(RegGpbHi) | F(RegGpw) | F(Mem8) | F(Mem16), 0x00),
+  ROW(F(RegMm) | F(MemUnspecified) | F(Mem64), 0x00),
+  ROW(F(RegMm) | F(MemUnspecified) | F(Mem64) | F(ImmI8) | F(ImmU8), 0x00),
+  ROW(F(RegMm) | F(MemUnspecified) | F(Mem32), 0x00),
+  ROW(F(ImmU16), 0x00),
+  ROW(F(RegXmm) | F(RegYmm) | F(Mem128) | F(Mem256), 0x00),
+  ROW(F(RegXmm) | F(ImmI8) | F(ImmU8), 0x00),
+  ROW(F(RegXmm) | F(FlagImplicit), 0x01),
+  ROW(F(MemUnspecified) | F(FlagMib), 0x00),
+  ROW(F(RegGpw) | F(RegGpd) | F(RegGpq), 0x00),
+  ROW(F(MemUnspecified) | F(Mem8) | F(Mem16) | F(Mem32) | F(Mem48) | F(Mem64) | F(Mem80) | F(Mem128) | F(Mem256) | F(Mem512) | F(Mem1024), 0x00),
+  ROW(F(MemUnspecified) | F(Mem512) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x01),
+  ROW(F(RegGpq) | F(FlagImplicit), 0x02),
+  ROW(F(RegGpq) | F(FlagImplicit), 0x08),
+  ROW(F(RegGpd) | F(FlagImplicit), 0x08),
+  ROW(F(RegGpd) | F(RegGpq) | F(MemUnspecified) | F(Mem32) | F(Mem64), 0x00),
+  ROW(F(Mem16) | F(Mem32), 0x00),
+  ROW(F(Mem16) | F(Mem32) | F(Mem64), 0x00),
+  ROW(F(RegSt) | F(Mem32) | F(Mem64) | F(Mem80), 0x00),
+  ROW(F(RegGpw) | F(MemUnspecified) | F(Mem16), 0x01),
+  ROW(F(RegGpbLo) | F(RegGpw) | F(RegGpd), 0x01),
+  ROW(F(RegGpw) | F(ImmI8) | F(ImmU8), 0x04),
+  ROW(F(Mem8) | F(Mem16) | F(Mem32) | F(FlagMemBase) | F(FlagMemEs), 0x80),
+  ROW(F(RegGpw), 0x04),
+  ROW(F(RegGpbHi) | F(FlagImplicit), 0x01),
+  ROW(F(MemUnspecified) | F(Mem128) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x80),
+  ROW(F(MemUnspecified) | F(Mem64) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x80),
+  ROW(F(MemUnspecified) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x01),
+  ROW(F(RegGpw) | F(ImmU8), 0x04),
+  ROW(F(Mem8) | F(Mem16) | F(Mem32) | F(FlagMemBase) | F(FlagMemDs), 0x40),
+  ROW(F(RegGpw) | F(RegGpd) | F(RegGpq) | F(MemUnspecified) | F(Mem16), 0x00),
+  ROW(F(RegTmm), 0x00),
+  ROW(F(MemUnspecified) | F(FlagTMem), 0x00),
+  ROW(F(MemUnspecified) | F(FlagMemBase) | F(FlagMemDs), 0x00),
+  ROW(F(RegYmm) | F(RegZmm), 0x00),
+  ROW(F(RegXmm) | F(RegYmm) | F(RegZmm), 0x00),
+  ROW(F(RegXmm) | F(RegYmm) | F(RegZmm) | F(Mem128) | F(Mem256) | F(Mem512), 0x00),
+  ROW(F(RegGpd) | F(RegXmm) | F(MemUnspecified) | F(Mem8), 0x00),
+  ROW(F(RegGpd) | F(RegXmm) | F(MemUnspecified) | F(Mem32), 0x00),
+  ROW(F(RegGpd) | F(RegXmm) | F(MemUnspecified) | F(Mem16), 0x00)
+};
+#undef F
+#undef ROW
+// ----------------------------------------------------------------------------
+// ${InstSignatureTable:End}
+#endif // !ASMJIT_NO_VALIDATION
+
+// x86::InstInternal - QueryRWInfo
+// ===============================
+
+// ${InstRWInfoTable:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+const uint8_t InstDB::rwInfoIndexA[Inst::_kIdCount] = {
+  0, 0, 1, 1, 0, 2, 3, 2, 4, 4, 5, 6, 4, 4, 3, 4, 4, 4, 4, 7, 0, 2, 0, 4, 4, 4,
+  4, 8, 0, 9, 9, 9, 9, 9, 0, 0, 0, 0, 9, 9, 9, 9, 9, 10, 10, 10, 11, 11, 12, 13,
+  14, 9, 9, 0, 15, 16, 16, 16, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 18, 0, 0, 19, 0, 0, 0, 0, 0, 20, 21, 0, 22, 23, 24, 7, 25,
+  25, 25, 24, 26, 7, 24, 27, 28, 29, 30, 31, 32, 33, 25, 25, 7, 27, 28, 33, 34,
+  0, 0, 0, 0, 35, 4, 4, 5, 6, 0, 0, 0, 0, 0, 36, 36, 0, 0, 37, 0, 0, 38, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 0, 38, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 0, 38,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 0, 39, 4,
+  4, 35, 40, 41, 0, 0, 0, 42, 0, 37, 0, 0, 0, 0, 43, 0, 44, 43, 43, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46, 47, 48, 49, 50, 51,
+  52, 53, 0, 0, 0, 54, 55, 56, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54, 55, 56, 57, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 0, 59, 0, 60, 0, 61, 0, 60, 0, 60, 0, 60,
+  0, 0, 0, 0, 0, 62, 63, 63, 63, 58, 60, 0, 0, 0, 9, 0, 0, 4, 4, 5, 6, 0, 0, 4,
+  4, 5, 6, 0, 0, 64, 65, 66, 66, 67, 47, 24, 36, 67, 52, 66, 66, 68, 69, 69, 70,
+  71, 71, 72, 72, 59, 59, 67, 59, 59, 71, 71, 73, 48, 52, 74, 75, 7, 7, 76, 77,
+  9, 66, 66, 77, 0, 35, 4, 4, 5, 6, 0, 78, 0, 0, 79, 0, 2, 4, 4, 80, 81, 9, 9,
+  9, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 0, 3, 82, 3, 0, 0, 0, 3, 3,
+  4, 3, 0, 0, 3, 3, 4, 3, 0, 0, 0, 0, 0, 0, 0, 0, 83, 27, 27, 82, 82, 82, 82, 82,
+  82, 82, 82, 82, 82, 27, 82, 82, 82, 27, 27, 82, 82, 82, 3, 3, 3, 84, 3, 3, 3,
+  27, 27, 0, 0, 0, 0, 3, 3, 4, 4, 3, 3, 4, 4, 4, 4, 3, 3, 4, 4, 85, 86, 87, 24,
+  24, 24, 86, 86, 87, 24, 24, 24, 86, 4, 3, 82, 3, 3, 4, 3, 3, 0, 0, 0, 9, 0,
+  0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 3, 3, 3, 3, 88, 3, 3, 0, 3, 3,
+  3, 88, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 27, 89, 0, 3, 3, 4, 3, 90, 90, 4, 90, 0,
+  0, 0, 0, 0, 0, 0, 3, 91, 7, 92, 91, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 93, 0, 0,
+  0, 0, 0, 91, 91, 0, 0, 0, 0, 0, 0, 7, 92, 0, 0, 91, 91, 0, 0, 2, 94, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 4, 4, 4, 0, 4, 4, 0, 91, 0, 0, 91, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 7, 7, 26, 92, 0, 0, 0, 0, 0, 0, 95, 0, 0, 0, 2, 4, 4, 5, 6, 0, 0, 0, 0, 0,
+  0, 0, 9, 0, 0, 0, 0, 0, 15, 0, 96, 96, 0, 97, 0, 0, 9, 9, 20, 21, 98, 98, 0,
+  0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99, 28, 100, 101, 100, 101, 99, 28, 100, 101,
+  100, 101, 102, 103, 0, 0, 0, 0, 0, 0, 20, 104, 21, 105, 105, 106, 77, 9, 0, 77,
+  107, 108, 107, 9, 107, 9, 109, 110, 106, 109, 110, 109, 110, 9, 9, 9, 106,
+  0, 77, 106, 9, 106, 9, 108, 107, 0, 28, 0, 28, 0, 111, 0, 111, 0, 0, 0, 0, 0,
+  33, 33, 107, 9, 107, 9, 109, 110, 109, 110, 9, 9, 9, 106, 9, 106, 28, 28, 111,
+  111, 33, 33, 106, 77, 9, 9, 108, 107, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 112, 112, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 27, 113, 60, 60,
+  0, 0, 0, 0, 0, 0, 0, 0, 60, 114, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 115, 47, 116, 115, 115, 115, 115, 115,
+  115, 115, 115, 0, 117, 117, 0, 71, 71, 118, 119, 67, 67, 67, 67, 120, 71, 121,
+  9, 9, 73, 115, 115, 49, 0, 0, 0, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 0,
+  0, 0, 0, 0, 0, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 33, 124, 124, 28, 125, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 105, 105, 105, 105, 0,
+  0, 0, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 0, 0, 0, 0, 60, 60, 114, 60, 7, 7, 7,
+  0, 7, 0, 7, 7, 7, 7, 7, 7, 0, 7, 7, 84, 7, 0, 7, 0, 0, 7, 0, 0, 0, 0, 9, 9, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 126, 126, 127, 128, 124, 124, 124, 124, 85, 126, 129, 128,
+  127, 127, 128, 129, 128, 127, 128, 130, 131, 106, 106, 106, 130, 127, 128,
+  129, 128, 127, 128, 126, 128, 130, 131, 106, 106, 106, 130, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67, 67,
+  132, 67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 112, 112, 0, 0, 9, 9, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 112, 112, 0, 0, 9, 9, 0, 0, 0,
+  0, 0, 0, 0, 0, 67, 67, 0, 0, 0, 0, 0, 0, 0, 0, 67, 132, 0, 0, 0, 0, 0, 0, 9,
+  9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 122, 20, 104, 21, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 133, 134, 133, 134, 0, 135, 0, 136, 0, 0, 0, 2, 4, 4, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+const uint8_t InstDB::rwInfoIndexB[Inst::_kIdCount] = {
+  0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0,
+  0, 0, 4, 0, 0, 0, 0, 0, 5, 5, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 7, 0, 0, 0, 0, 4, 8, 1, 0, 9, 0, 0, 0, 10, 10, 10, 0, 0, 11, 0, 0, 10, 12,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 5, 5, 0, 13, 14, 15, 16, 17, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 1, 1, 20, 21, 0, 0,
+  0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 22, 23, 0, 0, 24, 25, 26, 27, 0, 0, 25, 25, 25,
+  25, 25, 25, 25, 25, 28, 29, 29, 28, 0, 0, 0, 24, 25, 24, 25, 0, 25, 24, 24, 24,
+  24, 24, 24, 24, 0, 0, 30, 30, 30, 24, 24, 28, 0, 31, 10, 0, 0, 0, 0, 0, 0, 24,
+  25, 0, 0, 0, 32, 33, 32, 34, 0, 0, 0, 0, 0, 10, 32, 0, 0, 0, 0, 35, 33, 32,
+  35, 34, 24, 25, 24, 25, 0, 29, 29, 29, 29, 0, 0, 0, 25, 10, 10, 32, 32, 0, 0,
+  0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 21, 36, 0, 20, 37, 38, 0, 39, 40, 0, 0, 0, 0,
+  0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 41, 42, 41, 42, 43,
+  44, 43, 44, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 0, 0, 0, 0, 44, 45, 46, 47,
+  48, 45, 46, 47, 48, 0, 0, 0, 0, 49, 50, 51, 41, 42, 43, 44, 41, 42, 43, 44, 52,
+  0, 24, 0, 53, 0, 54, 0, 0, 0, 0, 0, 10, 0, 10, 24, 55, 56, 55, 0, 0, 0, 0,
+  0, 0, 55, 57, 57, 0, 58, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 60, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 61, 0, 0, 61, 0, 0, 0, 0, 0, 5, 62, 0, 0, 0, 0, 63, 0, 64, 20, 65, 20, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 0, 0, 0,
+  6, 5, 5, 0, 0, 0, 0, 67, 68, 0, 0, 0, 0, 69, 70, 0, 3, 3, 71, 22, 72, 73, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 74, 39, 75, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 0, 0, 0, 0, 0, 0, 0, 10,
+  10, 10, 10, 10, 10, 10, 0, 0, 2, 2, 2, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 0, 0, 0, 0, 0, 0, 0, 0, 79, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 80, 81, 80, 81, 81, 81, 80, 80, 82, 83, 0, 84,
+  0, 0, 0, 0, 0, 0, 85, 2, 2, 86, 87, 0, 0, 0, 11, 88, 0, 0, 4, 0, 0, 0, 89, 0,
+  90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90,
+  90, 90, 90, 90, 90, 90, 90, 90, 90, 0, 90, 0, 32, 0, 0, 0, 5, 0, 0, 6, 0, 91,
+  4, 0, 91, 4, 5, 5, 32, 19, 92, 80, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 93, 0, 92,
+  94, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 95, 95, 95, 95, 0, 0, 0, 0,
+  0, 0, 96, 97, 0, 0, 0, 0, 0, 0, 0, 0, 56, 97, 0, 0, 0, 0, 98, 99, 98, 99, 3,
+  3, 3, 100, 101, 102, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 103,
+  103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 104, 3, 105, 106, 107, 0,
+  0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  108, 0, 0, 0, 0, 0, 0, 0, 109, 0, 110, 0, 111, 0, 111, 0, 112, 113, 114, 115,
+  116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 112, 113, 114, 0, 0, 3, 3, 3, 3, 100, 111, 102, 3, 117, 3, 55, 55,
+  0, 0, 0, 0, 118, 119, 120, 119, 120, 118, 119, 120, 119, 120, 22, 121, 122, 121,
+  122, 121, 121, 123, 124, 121, 121, 121, 125, 126, 127, 121, 121, 121, 125,
+  126, 127, 121, 121, 121, 125, 126, 127, 121, 122, 128, 128, 129, 130, 121, 121,
+  121, 121, 121, 121, 121, 121, 121, 128, 128, 121, 121, 121, 125, 131, 127, 121,
+  121, 121, 125, 131, 127, 121, 121, 121, 125, 131, 127, 121, 121, 121, 121,
+  121, 121, 121, 121, 121, 128, 128, 128, 128, 129, 130, 121, 122, 121, 121, 121,
+  125, 126, 127, 121, 121, 121, 125, 126, 127, 121, 121, 121, 125, 126, 127,
+  128, 128, 129, 130, 121, 121, 121, 125, 131, 127, 121, 121, 121, 125, 131, 127,
+  121, 121, 121, 132, 131, 133, 128, 128, 129, 130, 134, 134, 134, 78, 135, 136,
+  0, 0, 0, 0, 137, 138, 10, 10, 10, 10, 10, 10, 10, 10, 138, 139, 0, 0, 0, 140,
+  141, 142, 85, 85, 85, 140, 141, 142, 3, 3, 3, 3, 3, 3, 3, 143, 144, 145, 144,
+  145, 143, 144, 145, 144, 145, 102, 0, 53, 58, 146, 146, 3, 3, 3, 100, 101,
+  102, 0, 147, 0, 3, 3, 3, 100, 101, 102, 0, 148, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 149, 150, 150, 151, 152, 152, 0, 0, 0, 0, 0, 0, 0, 153, 154, 0, 0, 155,
+  0, 0, 0, 3, 11, 147, 0, 0, 156, 148, 3, 3, 3, 100, 101, 102, 0, 11, 3, 3, 157,
+  157, 158, 158, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 103, 3, 0, 0, 0, 0, 0, 0, 3, 128, 104, 104, 3,
+  3, 3, 3, 67, 68, 3, 3, 3, 3, 69, 70, 104, 104, 104, 104, 104, 104, 117, 117, 0,
+  0, 0, 0, 117, 117, 117, 117, 117, 117, 0, 0, 121, 121, 121, 121, 159, 159, 3,
+  3, 3, 121, 3, 3, 121, 121, 128, 128, 160, 160, 160, 3, 160, 3, 121, 121, 121,
+  121, 121, 3, 0, 0, 0, 0, 71, 22, 72, 161, 138, 137, 139, 138, 0, 0, 0, 3, 0,
+  3, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 3, 3, 0, 162, 102, 100, 101, 0, 0,
+  163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 121, 121, 3, 3, 146,
+  146, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 3, 3, 3, 164, 85, 85,
+  3, 3, 85, 85, 3, 3, 165, 165, 165, 165, 3, 0, 0, 0, 0, 165, 165, 165, 165, 165,
+  165, 3, 3, 121, 121, 121, 3, 165, 165, 3, 3, 121, 121, 121, 3, 3, 104, 85, 85,
+  85, 3, 3, 3, 166, 167, 166, 3, 3, 3, 168, 166, 169, 3, 3, 3, 168, 166, 167,
+  166, 3, 3, 3, 168, 3, 3, 3, 3, 3, 3, 3, 3, 121, 121, 0, 104, 104, 104, 104, 104,
+  104, 104, 104, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 140, 142, 0, 0, 140,
+  142, 0, 0, 140, 142, 0, 0, 141, 142, 85, 85, 85, 140, 141, 142, 85, 85, 85, 140,
+  141, 142, 85, 85, 140, 142, 0, 0, 140, 142, 0, 0, 140, 142, 0, 0, 141, 142,
+  3, 3, 3, 100, 101, 102, 0, 0, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 3, 3, 3,
+  3, 3, 3, 0, 0, 0, 140, 141, 142, 93, 3, 3, 3, 100, 101, 102, 0, 0, 0, 0, 0, 3,
+  3, 3, 3, 3, 3, 0, 0, 0, 0, 56, 56, 170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, 0, 0,
+  0, 0, 0, 171, 171, 171, 171, 172, 172, 172, 172, 172, 172, 172, 172, 170, 0,
+  0
+};
+
+const InstDB::RWInfo InstDB::rwInfoA[] = {
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 0 , 0 , 0 , 0 , 0 , 0  } }, // #0 [ref=1007x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 1 , 0 , 0 , 0 , 0 , 0  } }, // #1 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 1 , { 2 , 3 , 0 , 0 , 0 , 0  } }, // #2 [ref=7x]
+  { InstDB::RWInfo::kCategoryGeneric   , 2 , { 2 , 3 , 0 , 0 , 0 , 0  } }, // #3 [ref=96x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 4 , 5 , 0 , 0 , 0 , 0  } }, // #4 [ref=55x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 6 , 7 , 0 , 0 , 0 , 0  } }, // #5 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 8 , 9 , 0 , 0 , 0 , 0  } }, // #6 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 10, 5 , 0 , 0 , 0 , 0  } }, // #7 [ref=26x]
+  { InstDB::RWInfo::kCategoryGeneric   , 7 , { 12, 13, 0 , 0 , 0 , 0  } }, // #8 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 2 , { 11, 3 , 0 , 0 , 0 , 0  } }, // #9 [ref=75x]
+  { InstDB::RWInfo::kCategoryGeneric   , 2 , { 5 , 3 , 0 , 0 , 0 , 0  } }, // #10 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 10, 3 , 0 , 0 , 0 , 0  } }, // #11 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 9 , { 10, 5 , 0 , 0 , 0 , 0  } }, // #12 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 15, 5 , 0 , 0 , 0 , 0  } }, // #13 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 3 , 3 , 0 , 0 , 0 , 0  } }, // #14 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 10, { 3 , 3 , 0 , 0 , 0 , 0  } }, // #15 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 10, { 2 , 3 , 0 , 0 , 0 , 0  } }, // #16 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 16, 17, 0 , 0 , 0 , 0  } }, // #17 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 1 , { 3 , 3 , 0 , 0 , 0 , 0  } }, // #18 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 20, 21, 0 , 0 , 0 , 0  } }, // #19 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 7 , 7 , 0 , 0 , 0 , 0  } }, // #20 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 9 , 9 , 0 , 0 , 0 , 0  } }, // #21 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 33, 34, 0 , 0 , 0 , 0  } }, // #22 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 14, { 2 , 3 , 0 , 0 , 0 , 0  } }, // #23 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 10, 7 , 0 , 0 , 0 , 0  } }, // #24 [ref=10x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 35, 5 , 0 , 0 , 0 , 0  } }, // #25 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 36, 7 , 0 , 0 , 0 , 0  } }, // #26 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 35, 7 , 0 , 0 , 0 , 0  } }, // #27 [ref=11x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 11, 7 , 0 , 0 , 0 , 0  } }, // #28 [ref=9x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 37, 7 , 0 , 0 , 0 , 0  } }, // #29 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 14, { 36, 3 , 0 , 0 , 0 , 0  } }, // #30 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 14, { 37, 3 , 0 , 0 , 0 , 0  } }, // #31 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 36, 9 , 0 , 0 , 0 , 0  } }, // #32 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 11, 9 , 0 , 0 , 0 , 0  } }, // #33 [ref=7x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 38, 39, 0 , 0 , 0 , 0  } }, // #34 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 15, { 1 , 40, 0 , 0 , 0 , 0  } }, // #35 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 16, { 11, 43, 0 , 0 , 0 , 0  } }, // #36 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 4 , 5 , 0 , 0 , 0 , 0  } }, // #37 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 45, 46, 0 , 0 , 0 , 0  } }, // #38 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 0 , 50, 0 , 0 , 0 , 0  } }, // #39 [ref=1x]
+  { InstDB::RWInfo::kCategoryImul      , 2 , { 0 , 0 , 0 , 0 , 0 , 0  } }, // #40 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 51, 52, 0 , 0 , 0 , 0  } }, // #41 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 54, 52, 0 , 0 , 0 , 0  } }, // #42 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 13, { 3 , 5 , 0 , 0 , 0 , 0  } }, // #43 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 22, 29, 0 , 0 , 0 , 0  } }, // #44 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 55, 0 , 0 , 0 , 0 , 0  } }, // #45 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 23, { 56, 40, 0 , 0 , 0 , 0  } }, // #46 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 24, { 44, 9 , 0 , 0 , 0 , 0  } }, // #47 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 25, { 35, 7 , 0 , 0 , 0 , 0  } }, // #48 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 26, { 48, 13, 0 , 0 , 0 , 0  } }, // #49 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 56, 40, 0 , 0 , 0 , 0  } }, // #50 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 44, 9 , 0 , 0 , 0 , 0  } }, // #51 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 35, 7 , 0 , 0 , 0 , 0  } }, // #52 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 48, 13, 0 , 0 , 0 , 0  } }, // #53 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 40, 40, 0 , 0 , 0 , 0  } }, // #54 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 9 , 9 , 0 , 0 , 0 , 0  } }, // #55 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 7 , 7 , 0 , 0 , 0 , 0  } }, // #56 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 13, 13, 0 , 0 , 0 , 0  } }, // #57 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 27, { 11, 3 , 0 , 0 , 0 , 0  } }, // #58 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 13, { 10, 5 , 0 , 0 , 0 , 0  } }, // #59 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 2 , 3 , 0 , 0 , 0 , 0  } }, // #60 [ref=11x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 11, 3 , 0 , 0 , 0 , 0  } }, // #61 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 51, 20, 0 , 0 , 0 , 0  } }, // #62 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 58, 0 , 0 , 0 , 0 , 0  } }, // #63 [ref=3x]
+  { InstDB::RWInfo::kCategoryMov       , 29, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #64 [ref=1x]
+  { InstDB::RWInfo::kCategoryMovabs    , 0 , { 0 , 0 , 0 , 0 , 0 , 0  } }, // #65 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 30, { 10, 5 , 0 , 0 , 0 , 0  } }, // #66 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 11, 3 , 0 , 0 , 0 , 0  } }, // #67 [ref=14x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 36, 61, 0 , 0 , 0 , 0  } }, // #68 [ref=1x]
+  { InstDB::RWInfo::kCategoryMovh64    , 12, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #69 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 62, 7 , 0 , 0 , 0 , 0  } }, // #70 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 12, { 35, 7 , 0 , 0 , 0 , 0  } }, // #71 [ref=7x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 56, 5 , 0 , 0 , 0 , 0  } }, // #72 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 28, { 44, 9 , 0 , 0 , 0 , 0  } }, // #73 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 63, 20, 0 , 0 , 0 , 0  } }, // #74 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 31, { 35, 7 , 0 , 0 , 0 , 0  } }, // #75 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 33, { 44, 9 , 0 , 0 , 0 , 0  } }, // #76 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 14, { 11, 3 , 0 , 0 , 0 , 0  } }, // #77 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 17, 29, 0 , 0 , 0 , 0  } }, // #78 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 11, { 3 , 3 , 0 , 0 , 0 , 0  } }, // #79 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 52, 22, 0 , 0 , 0 , 0  } }, // #80 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 52, 66, 0 , 0 , 0 , 0  } }, // #81 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 26, 7 , 0 , 0 , 0 , 0  } }, // #82 [ref=18x]
+  { InstDB::RWInfo::kCategoryGeneric   , 36, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #83 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 69, 5 , 0 , 0 , 0 , 0  } }, // #84 [ref=2x]
+  { InstDB::RWInfo::kCategoryVmov1_8   , 0 , { 0 , 0 , 0 , 0 , 0 , 0  } }, // #85 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 10, 9 , 0 , 0 , 0 , 0  } }, // #86 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 27, { 10, 13, 0 , 0 , 0 , 0  } }, // #87 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 4 , 0 , 0 , 0 , 0 , 0  } }, // #88 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 5 , 5 , 0 , 0 , 0 , 0  } }, // #89 [ref=1x]
+  { InstDB::RWInfo::kCategoryPunpcklxx , 38, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #90 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 10, { 2 , 71, 0 , 0 , 0 , 0  } }, // #91 [ref=8x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 37, 9 , 0 , 0 , 0 , 0  } }, // #92 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 16, 50, 0 , 0 , 0 , 0  } }, // #93 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 22, 21, 0 , 0 , 0 , 0  } }, // #94 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 63, 22, 0 , 0 , 0 , 0  } }, // #95 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 74, 3 , 0 , 0 , 0 , 0  } }, // #96 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 11, 43, 0 , 0 , 0 , 0  } }, // #97 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 53, 9 , 0 , 0 , 0 , 0  } }, // #98 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 13, { 80, 5 , 0 , 0 , 0 , 0  } }, // #99 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 13, { 11, 5 , 0 , 0 , 0 , 0  } }, // #100 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 43, { 74, 81, 0 , 0 , 0 , 0  } }, // #101 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 44, { 11, 7 , 0 , 0 , 0 , 0  } }, // #102 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 45, { 11, 9 , 0 , 0 , 0 , 0  } }, // #103 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 27, { 13, 13, 0 , 0 , 0 , 0  } }, // #104 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 11, { 11, 3 , 0 , 0 , 0 , 0  } }, // #105 [ref=7x]
+  { InstDB::RWInfo::kCategoryVmov2_1   , 46, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #106 [ref=14x]
+  { InstDB::RWInfo::kCategoryVmov1_2   , 14, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #107 [ref=7x]
+  { InstDB::RWInfo::kCategoryGeneric   , 14, { 10, 3 , 0 , 0 , 0 , 0  } }, // #108 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 46, { 11, 3 , 0 , 0 , 0 , 0  } }, // #109 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 47, { 11, 5 , 0 , 0 , 0 , 0  } }, // #110 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 27, { 11, 5 , 0 , 0 , 0 , 0  } }, // #111 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 51, { 74, 43, 0 , 0 , 0 , 0  } }, // #112 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 44, 9 , 0 , 0 , 0 , 0  } }, // #113 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 18, { 2 , 3 , 0 , 0 , 0 , 0  } }, // #114 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 58, { 11, 3 , 0 , 0 , 0 , 0  } }, // #115 [ref=12x]
+  { InstDB::RWInfo::kCategoryVmovddup  , 38, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #116 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 12, { 35, 61, 0 , 0 , 0 , 0  } }, // #117 [ref=2x]
+  { InstDB::RWInfo::kCategoryVmovmskpd , 0 , { 0 , 0 , 0 , 0 , 0 , 0  } }, // #118 [ref=1x]
+  { InstDB::RWInfo::kCategoryVmovmskps , 0 , { 0 , 0 , 0 , 0 , 0 , 0  } }, // #119 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 59, { 35, 7 , 0 , 0 , 0 , 0  } }, // #120 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 21, { 48, 13, 0 , 0 , 0 , 0  } }, // #121 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 2 , { 3 , 3 , 0 , 0 , 0 , 0  } }, // #122 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 15, { 11, 40, 0 , 0 , 0 , 0  } }, // #123 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 11, 7 , 0 , 0 , 0 , 0  } }, // #124 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 27, { 11, 13, 0 , 0 , 0 , 0  } }, // #125 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 35, 3 , 0 , 0 , 0 , 0  } }, // #126 [ref=4x]
+  { InstDB::RWInfo::kCategoryVmov1_4   , 62, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #127 [ref=6x]
+  { InstDB::RWInfo::kCategoryVmov1_2   , 48, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #128 [ref=9x]
+  { InstDB::RWInfo::kCategoryVmov1_8   , 63, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #129 [ref=3x]
+  { InstDB::RWInfo::kCategoryVmov4_1   , 47, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #130 [ref=4x]
+  { InstDB::RWInfo::kCategoryVmov8_1   , 64, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #131 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 18, { 11, 3 , 0 , 0 , 0 , 0  } }, // #132 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 17, { 44, 9 , 0 , 0 , 0 , 0  } }, // #133 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 35, { 35, 7 , 0 , 0 , 0 , 0  } }, // #134 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 11, { 2 , 2 , 0 , 0 , 0 , 0  } }, // #135 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 58, { 2 , 2 , 0 , 0 , 0 , 0  } }  // #136 [ref=1x]
+};
+
+const InstDB::RWInfo InstDB::rwInfoB[] = {
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 0 , 0 , 0 , 0 , 0 , 0  } }, // #0 [ref=773x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 1 , 0 , 0 , 0 , 0 , 0  } }, // #1 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 10, 5 , 0 , 0 , 0 , 0  } }, // #2 [ref=7x]
+  { InstDB::RWInfo::kCategoryGeneric   , 6 , { 11, 3 , 3 , 0 , 0 , 0  } }, // #3 [ref=193x]
+  { InstDB::RWInfo::kCategoryGeneric   , 2 , { 11, 3 , 3 , 0 , 0 , 0  } }, // #4 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 4 , 5 , 0 , 0 , 0 , 0  } }, // #5 [ref=14x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 4 , 5 , 14, 0 , 0 , 0  } }, // #6 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 2 , 0 , 0 , 0 , 0 , 0  } }, // #7 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 11, { 3 , 0 , 0 , 0 , 0 , 0  } }, // #8 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 18, 0 , 0 , 0 , 0 , 0  } }, // #9 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 3 , 0 , 0 , 0 , 0 , 0  } }, // #10 [ref=34x]
+  { InstDB::RWInfo::kCategoryGeneric   , 12, { 7 , 0 , 0 , 0 , 0 , 0  } }, // #11 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 19, 0 , 0 , 0 , 0 , 0  } }, // #12 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 6 , 7 , 0 , 0 , 0 , 0  } }, // #13 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 8 , 9 , 0 , 0 , 0 , 0  } }, // #14 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 11, { 2 , 3 , 22, 0 , 0 , 0  } }, // #15 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 13, { 4 , 23, 18, 24, 25, 0  } }, // #16 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 12, { 26, 27, 28, 29, 30, 0  } }, // #17 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 28, 31, 32, 16, 0 , 0  } }, // #18 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 28, 0 , 0 , 0 , 0 , 0  } }, // #19 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 10, { 2 , 0 , 0 , 0 , 0 , 0  } }, // #20 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 6 , { 41, 42, 3 , 0 , 0 , 0  } }, // #21 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 17, { 44, 5 , 0 , 0 , 0 , 0  } }, // #22 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 4 , 0 , 0 , 0 , 0 , 0  } }, // #23 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 18, { 3 , 0 , 0 , 0 , 0 , 0  } }, // #24 [ref=17x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 45, 0 , 0 , 0 , 0 , 0  } }, // #25 [ref=16x]
+  { InstDB::RWInfo::kCategoryGeneric   , 19, { 46, 0 , 0 , 0 , 0 , 0  } }, // #26 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 19, { 47, 0 , 0 , 0 , 0 , 0  } }, // #27 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 20, { 3 , 0 , 0 , 0 , 0 , 0  } }, // #28 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 46, 0 , 0 , 0 , 0 , 0  } }, // #29 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 18, { 11, 0 , 0 , 0 , 0 , 0  } }, // #30 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 21, { 13, 0 , 0 , 0 , 0 , 0  } }, // #31 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 11, 0 , 0 , 0 , 0 , 0  } }, // #32 [ref=8x]
+  { InstDB::RWInfo::kCategoryGeneric   , 21, { 48, 0 , 0 , 0 , 0 , 0  } }, // #33 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 7 , { 49, 0 , 0 , 0 , 0 , 0  } }, // #34 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 20, { 11, 0 , 0 , 0 , 0 , 0  } }, // #35 [ref=2x]
+  { InstDB::RWInfo::kCategoryImul      , 22, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #36 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 53, 0 , 0 , 0 , 0 , 0  } }, // #37 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 26, 0 , 0 , 0 , 0 , 0  } }, // #38 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 4 , 9 , 0 , 0 , 0 , 0  } }, // #39 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 4 , 5 , 0 , 0 , 0 , 0  } }, // #40 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 56, 40, 40, 0 , 0 , 0  } }, // #41 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 44, 9 , 9 , 0 , 0 , 0  } }, // #42 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 35, 7 , 7 , 0 , 0 , 0  } }, // #43 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 48, 13, 13, 0 , 0 , 0  } }, // #44 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 56, 40, 0 , 0 , 0 , 0  } }, // #45 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 44, 9 , 0 , 0 , 0 , 0  } }, // #46 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 35, 7 , 0 , 0 , 0 , 0  } }, // #47 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 48, 13, 0 , 0 , 0 , 0  } }, // #48 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 48, 40, 40, 0 , 0 , 0  } }, // #49 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 35, 9 , 9 , 0 , 0 , 0  } }, // #50 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 44, 13, 13, 0 , 0 , 0  } }, // #51 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 57, 0 , 0 , 0 , 0 , 0  } }, // #52 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 28, { 9 , 0 , 0 , 0 , 0 , 0  } }, // #53 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 16, { 43, 0 , 0 , 0 , 0 , 0  } }, // #54 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 7 , { 13, 0 , 0 , 0 , 0 , 0  } }, // #55 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 3 , 0 , 0 , 0 , 0 , 0  } }, // #56 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 3 , 9 , 0 , 0 , 0 , 0  } }, // #57 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 5 , 5 , 59, 0 , 0 , 0  } }, // #58 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 7 , 7 , 59, 0 , 0 , 0  } }, // #59 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 19, 29, 60, 0 , 0 , 0  } }, // #60 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 32, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #61 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 6 , { 64, 42, 3 , 0 , 0 , 0  } }, // #62 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 6 , { 11, 11, 3 , 65, 0 , 0  } }, // #63 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 17, 29, 30, 0 , 0 , 0  } }, // #64 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 10, { 3 , 0 , 0 , 0 , 0 , 0  } }, // #65 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 2 , { 2 , 3 , 0 , 0 , 0 , 0  } }, // #66 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 5 , 5 , 0 , 67, 17, 60 } }, // #67 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 5 , 5 , 0 , 68, 17, 60 } }, // #68 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 5 , 5 , 0 , 67, 0 , 0  } }, // #69 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 5 , 5 , 0 , 68, 0 , 0  } }, // #70 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 34, { 56, 5 , 0 , 0 , 0 , 0  } }, // #71 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 35, { 35, 5 , 0 , 0 , 0 , 0  } }, // #72 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 37, { 48, 3 , 0 , 0 , 0 , 0  } }, // #73 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 15, { 4 , 40, 0 , 0 , 0 , 0  } }, // #74 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 4 , 7 , 0 , 0 , 0 , 0  } }, // #75 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 27, { 2 , 13, 0 , 0 , 0 , 0  } }, // #76 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 10, { 70, 0 , 0 , 0 , 0 , 0  } }, // #77 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 35, 7 , 0 , 0 , 0 , 0  } }, // #78 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 10, { 65, 0 , 0 , 0 , 0 , 0  } }, // #79 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 11, 0 , 0 , 0 , 0 , 0  } }, // #80 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 16, 50, 29, 0 , 0 , 0  } }, // #81 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 44, 0 , 0 , 0 , 0 , 0  } }, // #82 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 35, 0 , 0 , 0 , 0 , 0  } }, // #83 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 16, 50, 67, 0 , 0 , 0  } }, // #84 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 2 , { 11, 3 , 0 , 0 , 0 , 0  } }, // #85 [ref=19x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 36, 7 , 0 , 0 , 0 , 0  } }, // #86 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 37, 9 , 0 , 0 , 0 , 0  } }, // #87 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 72, 0 , 0 , 0 , 0 , 0  } }, // #88 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 7 , 0 , 0 , 0 , 0 , 0  } }, // #89 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 34, { 73, 0 , 0 , 0 , 0 , 0  } }, // #90 [ref=30x]
+  { InstDB::RWInfo::kCategoryGeneric   , 11, { 2 , 3 , 71, 0 , 0 , 0  } }, // #91 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 39, { 11, 0 , 0 , 0 , 0 , 0  } }, // #92 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 28, { 44, 0 , 0 , 0 , 0 , 0  } }, // #93 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 16, { 74, 0 , 0 , 0 , 0 , 0  } }, // #94 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 75, 43, 43, 0 , 0 , 0  } }, // #95 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 74, 0 , 0 , 0 , 0 , 0  } }, // #96 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 9 , 60, 17, 0 , 0 , 0  } }, // #97 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 13, { 75, 76, 77, 77, 77, 5  } }, // #98 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 13, { 4 , 78, 79, 79, 79, 5  } }, // #99 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 40, { 10, 5 , 7 , 0 , 0 , 0  } }, // #100 [ref=8x]
+  { InstDB::RWInfo::kCategoryGeneric   , 41, { 10, 5 , 13, 0 , 0 , 0  } }, // #101 [ref=7x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 10, 5 , 9 , 0 , 0 , 0  } }, // #102 [ref=9x]
+  { InstDB::RWInfo::kCategoryGeneric   , 6 , { 11, 3 , 3 , 3 , 0 , 0  } }, // #103 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 6 , { 35, 3 , 3 , 0 , 0 , 0  } }, // #104 [ref=18x]
+  { InstDB::RWInfo::kCategoryGeneric   , 40, { 11, 5 , 7 , 0 , 0 , 0  } }, // #105 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 41, { 35, 13, 13, 0 , 0 , 0  } }, // #106 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 11, 5 , 9 , 0 , 0 , 0  } }, // #107 [ref=1x]
+  { InstDB::RWInfo::kCategoryVmov1_2   , 48, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #108 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 40, { 10, 5 , 5 , 0 , 0 , 0  } }, // #109 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 40, { 10, 82, 7 , 0 , 0 , 0  } }, // #110 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 41, { 10, 5 , 5 , 0 , 0 , 0  } }, // #111 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 49, { 10, 61, 3 , 0 , 0 , 0  } }, // #112 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 49, { 10, 3 , 3 , 0 , 0 , 0  } }, // #113 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 49, { 10, 82, 3 , 0 , 0 , 0  } }, // #114 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 10, 61, 9 , 0 , 0 , 0  } }, // #115 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 10, 5 , 5 , 0 , 0 , 0  } }, // #116 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 50, { 10, 5 , 5 , 0 , 0 , 0  } }, // #117 [ref=9x]
+  { InstDB::RWInfo::kCategoryGeneric   , 52, { 10, 81, 0 , 0 , 0 , 0  } }, // #118 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 52, { 10, 3 , 0 , 0 , 0 , 0  } }, // #119 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 53, { 80, 43, 0 , 0 , 0 , 0  } }, // #120 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 6 , { 2 , 3 , 3 , 0 , 0 , 0  } }, // #121 [ref=82x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 4 , 5 , 5 , 0 , 0 , 0  } }, // #122 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 40, { 4 , 61, 7 , 0 , 0 , 0  } }, // #123 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 4 , 82, 9 , 0 , 0 , 0  } }, // #124 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 40, { 6 , 7 , 7 , 0 , 0 , 0  } }, // #125 [ref=11x]
+  { InstDB::RWInfo::kCategoryGeneric   , 41, { 4 , 5 , 5 , 0 , 0 , 0  } }, // #126 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 8 , 9 , 9 , 0 , 0 , 0  } }, // #127 [ref=11x]
+  { InstDB::RWInfo::kCategoryGeneric   , 54, { 11, 3 , 3 , 3 , 0 , 0  } }, // #128 [ref=15x]
+  { InstDB::RWInfo::kCategoryGeneric   , 55, { 35, 7 , 7 , 7 , 0 , 0  } }, // #129 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 56, { 44, 9 , 9 , 9 , 0 , 0  } }, // #130 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 41, { 4 , 5 , 13, 0 , 0 , 0  } }, // #131 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 40, { 26, 7 , 7 , 0 , 0 , 0  } }, // #132 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 53, 9 , 9 , 0 , 0 , 0  } }, // #133 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 14, { 35, 3 , 0 , 0 , 0 , 0  } }, // #134 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 27, { 35, 13, 0 , 0 , 0 , 0  } }, // #135 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 35, 9 , 0 , 0 , 0 , 0  } }, // #136 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 2 , 3 , 2 , 0 , 0 , 0  } }, // #137 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 2 , 3 , 2 , 0 , 0 , 0  } }, // #138 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 18, { 4 , 3 , 4 , 0 , 0 , 0  } }, // #139 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 40, { 10, 61, 7 , 0 , 0 , 0  } }, // #140 [ref=11x]
+  { InstDB::RWInfo::kCategoryGeneric   , 41, { 10, 83, 13, 0 , 0 , 0  } }, // #141 [ref=7x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 10, 82, 9 , 0 , 0 , 0  } }, // #142 [ref=13x]
+  { InstDB::RWInfo::kCategoryGeneric   , 50, { 80, 81, 5 , 0 , 0 , 0  } }, // #143 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 50, { 11, 3 , 5 , 0 , 0 , 0  } }, // #144 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 57, { 74, 43, 81, 0 , 0 , 0  } }, // #145 [ref=4x]
+  { InstDB::RWInfo::kCategoryVmaskmov  , 0 , { 0 , 0 , 0 , 0 , 0 , 0  } }, // #146 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 12, { 35, 0 , 0 , 0 , 0 , 0  } }, // #147 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 22, 0 , 0 , 0 , 0 , 0  } }, // #148 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 10, 61, 61, 0 , 0 , 0  } }, // #149 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 12, { 10, 7 , 7 , 0 , 0 , 0  } }, // #150 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 10, 7 , 7 , 0 , 0 , 0  } }, // #151 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 12, { 10, 61, 7 , 0 , 0 , 0  } }, // #152 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 10, 61, 7 , 0 , 0 , 0  } }, // #153 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 10, 83, 13, 0 , 0 , 0  } }, // #154 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 10, 82, 9 , 0 , 0 , 0  } }, // #155 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 84, 0 , 0 , 0 , 0 , 0  } }, // #156 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 60, { 85, 86, 3 , 3 , 0 , 0  } }, // #157 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 13, { 74, 76, 77, 77, 77, 5  } }, // #158 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 57, { 80, 81, 81, 0 , 0 , 0  } }, // #159 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 22, { 11, 3 , 3 , 0 , 0 , 0  } }, // #160 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 7 , { 48, 5 , 0 , 0 , 0 , 0  } }, // #161 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 61, { 10, 5 , 40, 0 , 0 , 0  } }, // #162 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 50, { 10, 5 , 5 , 5 , 0 , 0  } }, // #163 [ref=12x]
+  { InstDB::RWInfo::kCategoryGeneric   , 65, { 10, 5 , 5 , 5 , 0 , 0  } }, // #164 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 66, { 10, 5 , 5 , 0 , 0 , 0  } }, // #165 [ref=12x]
+  { InstDB::RWInfo::kCategoryGeneric   , 67, { 11, 3 , 5 , 0 , 0 , 0  } }, // #166 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 68, { 11, 3 , 0 , 0 , 0 , 0  } }, // #167 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 69, { 11, 3 , 5 , 0 , 0 , 0  } }, // #168 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 22, { 11, 3 , 5 , 0 , 0 , 0  } }, // #169 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 60, 17, 29, 0 , 0 , 0  } }, // #170 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 3 , 60, 17, 0 , 0 , 0  } }, // #171 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 11, 60, 17, 0 , 0 , 0  } }  // #172 [ref=8x]
+};
+
+const InstDB::RWInfoOp InstDB::rwInfoOp[] = {
+  { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kNone }, // #0 [ref=16519x]
+  { 0x0000000000000003u, 0x0000000000000003u, 0x00, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kRegPhysId }, // #1 [ref=10x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt }, // #2 [ref=236x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #3 [ref=1077x]
+  { 0x000000000000FFFFu, 0x000000000000FFFFu, 0xFF, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt }, // #4 [ref=108x]
+  { 0x000000000000FFFFu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #5 [ref=348x]
+  { 0x00000000000000FFu, 0x00000000000000FFu, 0xFF, 0, { 0 }, OpRWFlags::kRW }, // #6 [ref=18x]
+  { 0x00000000000000FFu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #7 [ref=186x]
+  { 0x000000000000000Fu, 0x000000000000000Fu, 0xFF, 0, { 0 }, OpRWFlags::kRW }, // #8 [ref=18x]
+  { 0x000000000000000Fu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #9 [ref=135x]
+  { 0x0000000000000000u, 0x000000000000FFFFu, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #10 [ref=184x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #11 [ref=455x]
+  { 0x0000000000000003u, 0x0000000000000003u, 0xFF, 0, { 0 }, OpRWFlags::kRW }, // #12 [ref=1x]
+  { 0x0000000000000003u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #13 [ref=63x]
+  { 0x000000000000FFFFu, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #14 [ref=4x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kMemBaseWrite | OpRWFlags::kMemIndexWrite }, // #15 [ref=1x]
+  { 0x0000000000000000u, 0x000000000000000Fu, 0x02, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #16 [ref=9x]
+  { 0x000000000000000Fu, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #17 [ref=23x]
+  { 0x00000000000000FFu, 0x00000000000000FFu, 0x00, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #18 [ref=2x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kMemPhysId }, // #19 [ref=3x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x06, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kMemBaseRW | OpRWFlags::kMemBasePostModify | OpRWFlags::kMemPhysId }, // #20 [ref=3x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x07, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kMemBaseRW | OpRWFlags::kMemBasePostModify | OpRWFlags::kMemPhysId }, // #21 [ref=2x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #22 [ref=7x]
+  { 0x00000000000000FFu, 0x00000000000000FFu, 0x02, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #23 [ref=1x]
+  { 0x00000000000000FFu, 0x0000000000000000u, 0x01, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #24 [ref=1x]
+  { 0x00000000000000FFu, 0x0000000000000000u, 0x03, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #25 [ref=1x]
+  { 0x00000000000000FFu, 0x00000000000000FFu, 0xFF, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt }, // #26 [ref=21x]
+  { 0x000000000000000Fu, 0x000000000000000Fu, 0x02, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #27 [ref=1x]
+  { 0x000000000000000Fu, 0x000000000000000Fu, 0x00, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #28 [ref=4x]
+  { 0x000000000000000Fu, 0x0000000000000000u, 0x01, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #29 [ref=13x]
+  { 0x000000000000000Fu, 0x0000000000000000u, 0x03, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #30 [ref=2x]
+  { 0x0000000000000000u, 0x000000000000000Fu, 0x03, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #31 [ref=1x]
+  { 0x000000000000000Fu, 0x000000000000000Fu, 0x01, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #32 [ref=1x]
+  { 0x0000000000000000u, 0x00000000000000FFu, 0x02, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #33 [ref=1x]
+  { 0x00000000000000FFu, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #34 [ref=1x]
+  { 0x0000000000000000u, 0x00000000000000FFu, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #35 [ref=82x]
+  { 0x0000000000000000u, 0x00000000000000FFu, 0xFF, 0, { 0 }, OpRWFlags::kWrite }, // #36 [ref=6x]
+  { 0x0000000000000000u, 0x000000000000000Fu, 0xFF, 0, { 0 }, OpRWFlags::kWrite }, // #37 [ref=6x]
+  { 0x0000000000000000u, 0x0000000000000003u, 0x02, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kRegPhysId }, // #38 [ref=1x]
+  { 0x0000000000000003u, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #39 [ref=1x]
+  { 0x0000000000000001u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #40 [ref=28x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x02, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kRegPhysId | OpRWFlags::kZExt }, // #41 [ref=2x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kRegPhysId | OpRWFlags::kZExt }, // #42 [ref=3x]
+  { 0xFFFFFFFFFFFFFFFFu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #43 [ref=29x]
+  { 0x0000000000000000u, 0x000000000000000Fu, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #44 [ref=30x]
+  { 0x00000000000003FFu, 0x00000000000003FFu, 0xFF, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt }, // #45 [ref=22x]
+  { 0x00000000000003FFu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #46 [ref=13x]
+  { 0x0000000000000000u, 0x00000000000003FFu, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #47 [ref=1x]
+  { 0x0000000000000000u, 0x0000000000000003u, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #48 [ref=17x]
+  { 0x0000000000000000u, 0x0000000000000003u, 0x00, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kRegPhysId | OpRWFlags::kZExt }, // #49 [ref=2x]
+  { 0x0000000000000000u, 0x000000000000000Fu, 0x00, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #50 [ref=8x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kRegPhysId | OpRWFlags::kZExt }, // #51 [ref=2x]
+  { 0x0000000000000003u, 0x0000000000000000u, 0x02, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #52 [ref=4x]
+  { 0x000000000000000Fu, 0x000000000000000Fu, 0xFF, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt }, // #53 [ref=4x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x07, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kMemPhysId }, // #54 [ref=1x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x01, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #55 [ref=1x]
+  { 0x0000000000000000u, 0x0000000000000001u, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #56 [ref=14x]
+  { 0x0000000000000000u, 0x0000000000000001u, 0x00, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kRegPhysId }, // #57 [ref=1x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x01, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kRegPhysId | OpRWFlags::kZExt }, // #58 [ref=3x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x07, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt | OpRWFlags::kMemPhysId }, // #59 [ref=3x]
+  { 0x000000000000000Fu, 0x0000000000000000u, 0x02, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #60 [ref=22x]
+  { 0x000000000000FF00u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #61 [ref=23x]
+  { 0x0000000000000000u, 0x000000000000FF00u, 0xFF, 0, { 0 }, OpRWFlags::kWrite }, // #62 [ref=1x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x07, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kMemBaseRW | OpRWFlags::kMemBasePostModify | OpRWFlags::kMemPhysId }, // #63 [ref=2x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x02, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kRegPhysId | OpRWFlags::kZExt }, // #64 [ref=1x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x02, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #65 [ref=2x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x06, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kMemPhysId }, // #66 [ref=1x]
+  { 0x0000000000000000u, 0x000000000000000Fu, 0x01, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #67 [ref=5x]
+  { 0x0000000000000000u, 0x000000000000FFFFu, 0x00, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #68 [ref=4x]
+  { 0x0000000000000000u, 0x0000000000000007u, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #69 [ref=2x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x04, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #70 [ref=1x]
+  { 0x0000000000000001u, 0x0000000000000000u, 0x01, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #71 [ref=10x]
+  { 0x0000000000000001u, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #72 [ref=1x]
+  { 0x0000000000000000u, 0x0000000000000001u, 0xFF, 0, { 0 }, OpRWFlags::kWrite }, // #73 [ref=30x]
+  { 0x0000000000000000u, 0xFFFFFFFFFFFFFFFFu, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #74 [ref=20x]
+  { 0xFFFFFFFFFFFFFFFFu, 0xFFFFFFFFFFFFFFFFu, 0xFF, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt }, // #75 [ref=7x]
+  { 0xFFFFFFFFFFFFFFFFu, 0x0000000000000000u, 0xFF, 4, { 0 }, OpRWFlags::kRead }, // #76 [ref=4x]
+  { 0xFFFFFFFFFFFFFFFFu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kConsecutive }, // #77 [ref=12x]
+  { 0x000000000000FFFFu, 0x0000000000000000u, 0xFF, 4, { 0 }, OpRWFlags::kRead }, // #78 [ref=2x]
+  { 0x000000000000FFFFu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kConsecutive }, // #79 [ref=6x]
+  { 0x0000000000000000u, 0x00000000FFFFFFFFu, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #80 [ref=10x]
+  { 0x00000000FFFFFFFFu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #81 [ref=16x]
+  { 0x000000000000FFF0u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #82 [ref=18x]
+  { 0x000000000000FFFCu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #83 [ref=8x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #84 [ref=1x]
+  { 0x0000000000000000u, 0x00000000000000FFu, 0xFF, 2, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #85 [ref=2x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kConsecutive }  // #86 [ref=2x]
+};
+
+const InstDB::RWInfoRm InstDB::rwInfoRm[] = {
+  { InstDB::RWInfoRm::kCategoryNone      , 0x00, 0 , 0, 0 }, // #0 [ref=1997x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x03, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #1 [ref=8x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x02, 0 , 0, 0 }, // #2 [ref=204x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x02, 16, 0, 0 }, // #3 [ref=122x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x02, 8 , 0, 0 }, // #4 [ref=66x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x02, 4 , 0, 0 }, // #5 [ref=35x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x04, 0 , 0, 0 }, // #6 [ref=300x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x01, 2 , 0, 0 }, // #7 [ref=9x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x00, 0 , 0, 0 }, // #8 [ref=63x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 0 , 0, 0 }, // #9 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x01, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #10 [ref=21x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x01, 0 , 0, 0 }, // #11 [ref=14x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x00, 8 , 0, 0 }, // #12 [ref=22x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x00, 16, 0, 0 }, // #13 [ref=21x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x02, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #14 [ref=22x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x02, 1 , 0, 0 }, // #15 [ref=5x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x00, 64, 0, 0 }, // #16 [ref=5x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x01, 4 , 0, 0 }, // #17 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryNone      , 0x00, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #18 [ref=26x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x00, 10, 0, 0 }, // #19 [ref=2x]
+  { InstDB::RWInfoRm::kCategoryNone      , 0x01, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #20 [ref=5x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x00, 2 , 0, 0 }, // #21 [ref=4x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x06, 0 , 0, 0 }, // #22 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 1 , 0, 0 }, // #23 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 4 , 0, 0 }, // #24 [ref=3x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 8 , 0, 0 }, // #25 [ref=2x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 2 , 0, 0 }, // #26 [ref=2x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x02, 2 , 0, 0 }, // #27 [ref=13x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x00, 4 , 0, 0 }, // #28 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryNone      , 0x03, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #29 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 16, 0, 0 }, // #30 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 8 , InstDB::RWInfoRm::kFlagMovssMovsd, 0 }, // #31 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryNone      , 0x00, 0 , InstDB::RWInfoRm::kFlagMovssMovsd, 0 }, // #32 [ref=2x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 4 , InstDB::RWInfoRm::kFlagMovssMovsd, 0 }, // #33 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x01, 1 , 0, 0 }, // #34 [ref=32x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x01, 8 , 0, 0 }, // #35 [ref=4x]
+  { InstDB::RWInfoRm::kCategoryNone      , 0x00, 0 , InstDB::RWInfoRm::kFlagPextrw, 0 }, // #36 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x01, 2 , InstDB::RWInfoRm::kFlagPextrw, uint32_t(CpuFeatures::X86::kSSE4_1) }, // #37 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryNone      , 0x02, 0 , 0, 0 }, // #38 [ref=4x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x01, 2 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #39 [ref=3x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x04, 8 , 0, 0 }, // #40 [ref=35x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x04, 2 , 0, 0 }, // #41 [ref=30x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x04, 4 , 0, 0 }, // #42 [ref=42x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x00, 32, 0, 0 }, // #43 [ref=4x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x02, 8 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #44 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x02, 4 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #45 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryHalf      , 0x02, 0 , 0, 0 }, // #46 [ref=19x]
+  { InstDB::RWInfoRm::kCategoryQuarter   , 0x02, 0 , 0, 0 }, // #47 [ref=9x]
+  { InstDB::RWInfoRm::kCategoryHalf      , 0x01, 0 , 0, 0 }, // #48 [ref=10x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x04, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #49 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x04, 16, 0, 0 }, // #50 [ref=27x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x02, 64, 0, 0 }, // #51 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x01, 16, 0, 0 }, // #52 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x01, 32, 0, 0 }, // #53 [ref=4x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x0C, 0 , 0, 0 }, // #54 [ref=15x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x0C, 8 , 0, 0 }, // #55 [ref=4x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x0C, 4 , 0, 0 }, // #56 [ref=4x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x04, 32, 0, 0 }, // #57 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x03, 0 , 0, 0 }, // #58 [ref=13x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 8 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #59 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x08, 0 , 0, 0 }, // #60 [ref=2x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x04, 1 , 0, 0 }, // #61 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryQuarter   , 0x01, 0 , 0, 0 }, // #62 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryEighth    , 0x01, 0 , 0, 0 }, // #63 [ref=3x]
+  { InstDB::RWInfoRm::kCategoryEighth    , 0x02, 0 , 0, 0 }, // #64 [ref=2x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x0C, 16, 0, 0 }, // #65 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x06, 16, 0, 0 }, // #66 [ref=12x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x06, 0 , InstDB::RWInfoRm::kFlagFeatureIfRMI, uint32_t(CpuFeatures::X86::kAVX512_F) }, // #67 [ref=5x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x02, 0 , InstDB::RWInfoRm::kFlagFeatureIfRMI, uint32_t(CpuFeatures::X86::kAVX512_BW) }, // #68 [ref=2x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x06, 0 , InstDB::RWInfoRm::kFlagFeatureIfRMI, uint32_t(CpuFeatures::X86::kAVX512_BW) }  // #69 [ref=3x]
+};
+// ----------------------------------------------------------------------------
+// ${InstRWInfoTable:End}
+
+// x86::InstDB - Tests
+// ===================
+
+#if defined(ASMJIT_TEST)
+UNIT(x86_inst_db) {
+  INFO("Checking validity of Inst enums");
+
+  // Cross-validate prefixes.
+  EXPECT(uint32_t(InstOptions::kX86_Rex ) == 0x40000000u, "REX prefix must be at 0x40000000");
+  EXPECT(uint32_t(InstOptions::kX86_Evex) == 0x00001000u, "EVEX prefix must be at 0x00001000");
+
+  // These could be combined together to form a valid REX prefix, they must match.
+  EXPECT(uint32_t(InstOptions::kX86_OpCodeB) == uint32_t(Opcode::kB), "Opcode::kB must match InstOptions::kX86_OpCodeB");
+  EXPECT(uint32_t(InstOptions::kX86_OpCodeX) == uint32_t(Opcode::kX), "Opcode::kX must match InstOptions::kX86_OpCodeX");
+  EXPECT(uint32_t(InstOptions::kX86_OpCodeR) == uint32_t(Opcode::kR), "Opcode::kR must match InstOptions::kX86_OpCodeR");
+  EXPECT(uint32_t(InstOptions::kX86_OpCodeW) == uint32_t(Opcode::kW), "Opcode::kW must match InstOptions::kX86_OpCodeW");
+
+  uint32_t rex_rb = (Opcode::kR >> Opcode::kREX_Shift) | (Opcode::kB >> Opcode::kREX_Shift) | 0x40;
+  uint32_t rex_rw = (Opcode::kR >> Opcode::kREX_Shift) | (Opcode::kW >> Opcode::kREX_Shift) | 0x40;
+
+  EXPECT(rex_rb == 0x45, "Opcode::kR|B must form a valid REX prefix (0x45) if combined with 0x40");
+  EXPECT(rex_rw == 0x4C, "Opcode::kR|W must form a valid REX prefix (0x4C) if combined with 0x40");
+}
+#endif
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86
diff --git a/lib/lepton/asmjit/x86/x86instdb.h b/lib/lepton/asmjit/x86/x86instdb.h
new file mode 100644
index 0000000000..87a286c282
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86instdb.h
@@ -0,0 +1,563 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86INSTDB_H_INCLUDED
+#define ASMJIT_X86_X86INSTDB_H_INCLUDED
+
+#include "../x86/x86globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \addtogroup asmjit_x86
+//! \{
+
+//! Instruction database (X86).
+namespace InstDB {
+
+//! Describes which operation mode is supported by an instruction.
+enum class Mode : uint8_t {
+  //! Invalid mode.
+  kNone = 0x00u,
+  //! X86 mode supported.
+  kX86 = 0x01u,
+  //! X64 mode supported.
+  kX64 = 0x02u,
+  //! Both X86 and X64 modes supported.
+  kAny = 0x03u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(Mode)
+
+//! Converts architecture to operation mode, see \ref Mode.
+static constexpr Mode modeFromArch(Arch arch) noexcept {
+  return arch == Arch::kX86 ? Mode::kX86 :
+         arch == Arch::kX64 ? Mode::kX64 : Mode::kNone;
+}
+
+//! Operand signature flags used by \ref OpSignature.
+enum class OpFlags : uint64_t {
+  //! No operand flags.
+  kNone = 0u,
+
+  kRegGpbLo        = 0x0000000000000001u, //!< Operand can be low 8-bit GPB register.
+  kRegGpbHi        = 0x0000000000000002u, //!< Operand can be high 8-bit GPB register.
+  kRegGpw          = 0x0000000000000004u, //!< Operand can be 16-bit GPW register.
+  kRegGpd          = 0x0000000000000008u, //!< Operand can be 32-bit GPD register.
+  kRegGpq          = 0x0000000000000010u, //!< Operand can be 64-bit GPQ register.
+  kRegXmm          = 0x0000000000000020u, //!< Operand can be 128-bit XMM register.
+  kRegYmm          = 0x0000000000000040u, //!< Operand can be 256-bit YMM register.
+  kRegZmm          = 0x0000000000000080u, //!< Operand can be 512-bit ZMM register.
+  kRegMm           = 0x0000000000000100u, //!< Operand can be 64-bit MM register.
+  kRegKReg         = 0x0000000000000200u, //!< Operand can be 64-bit K register.
+  kRegSReg         = 0x0000000000000400u, //!< Operand can be SReg (segment register).
+  kRegCReg         = 0x0000000000000800u, //!< Operand can be CReg (control register).
+  kRegDReg         = 0x0000000000001000u, //!< Operand can be DReg (debug register).
+  kRegSt           = 0x0000000000002000u, //!< Operand can be 80-bit ST register (X87).
+  kRegBnd          = 0x0000000000004000u, //!< Operand can be 128-bit BND register.
+  kRegTmm          = 0x0000000000008000u, //!< Operand can be 0..8192-bit TMM register.
+  kRegMask         = 0x000000000000FFFFu, //!< Mask of all possible register types.
+
+  kMemUnspecified  = 0x0000000000040000u, //!< Operand can be a scalar memory pointer without size.
+  kMem8            = 0x0000000000080000u, //!< Operand can be an 8-bit memory pointer.
+  kMem16           = 0x0000000000100000u, //!< Operand can be a 16-bit memory pointer.
+  kMem32           = 0x0000000000200000u, //!< Operand can be a 32-bit memory pointer.
+  kMem48           = 0x0000000000400000u, //!< Operand can be a 48-bit memory pointer (FAR pointers only).
+  kMem64           = 0x0000000000800000u, //!< Operand can be a 64-bit memory pointer.
+  kMem80           = 0x0000000001000000u, //!< Operand can be an 80-bit memory pointer.
+  kMem128          = 0x0000000002000000u, //!< Operand can be a 128-bit memory pointer.
+  kMem256          = 0x0000000004000000u, //!< Operand can be a 256-bit memory pointer.
+  kMem512          = 0x0000000008000000u, //!< Operand can be a 512-bit memory pointer.
+  kMem1024         = 0x0000000010000000u, //!< Operand can be a 1024-bit memory pointer.
+  kMemMask         = 0x000000001FFC0000u, //!< Mask of all possible scalar memory types.
+
+  kVm32x           = 0x0000000040000000u, //!< Operand can be a vm32x (vector) pointer.
+  kVm32y           = 0x0000000080000000u, //!< Operand can be a vm32y (vector) pointer.
+  kVm32z           = 0x0000000100000000u, //!< Operand can be a vm32z (vector) pointer.
+  kVm64x           = 0x0000000200000000u, //!< Operand can be a vm64x (vector) pointer.
+  kVm64y           = 0x0000000400000000u, //!< Operand can be a vm64y (vector) pointer.
+  kVm64z           = 0x0000000800000000u, //!< Operand can be a vm64z (vector) pointer.
+  kVmMask          = 0x0000000FC0000000u, //!< Mask of all possible vector memory types.
+
+  kImmI4           = 0x0000001000000000u, //!< Operand can be signed 4-bit immediate.
+  kImmU4           = 0x0000002000000000u, //!< Operand can be unsigned 4-bit immediate.
+  kImmI8           = 0x0000004000000000u, //!< Operand can be signed 8-bit immediate.
+  kImmU8           = 0x0000008000000000u, //!< Operand can be unsigned 8-bit immediate.
+  kImmI16          = 0x0000010000000000u, //!< Operand can be signed 16-bit immediate.
+  kImmU16          = 0x0000020000000000u, //!< Operand can be unsigned 16-bit immediate.
+  kImmI32          = 0x0000040000000000u, //!< Operand can be signed 32-bit immediate.
+  kImmU32          = 0x0000080000000000u, //!< Operand can be unsigned 32-bit immediate.
+  kImmI64          = 0x0000100000000000u, //!< Operand can be signed 64-bit immediate.
+  kImmU64          = 0x0000200000000000u, //!< Operand can be unsigned 64-bit immediate.
+  kImmMask         = 0x00003FF000000000u, //!< Mask of all immediate types.
+
+  kRel8            = 0x0000400000000000u, //!< Operand can be relative 8-bit  displacement.
+  kRel32           = 0x0000800000000000u, //!< Operand can be relative 32-bit displacement.
+  kRelMask         = 0x0000C00000000000u, //!< Mask of all relative displacement types.
+
+  kFlagMemBase     = 0x0001000000000000u, //!< Flag: Only memory base is allowed (no index, no offset).
+  kFlagMemDs       = 0x0002000000000000u, //!< Flag: Implicit memory operand's DS segment.
+  kFlagMemEs       = 0x0004000000000000u, //!< Flag: Implicit memory operand's ES segment.
+
+  kFlagMib         = 0x0008000000000000u, //!< Flag: Operand is MIB (base+index) pointer.
+  kFlagTMem        = 0x0010000000000000u, //!< Flag: Operand is TMEM (sib_mem), AMX memory pointer.
+
+  kFlagImplicit    = 0x0080000000000000u, //!< Flag: Operand is implicit.
+  kFlagMask        = 0x009F000000000000u, //!< Mask of all flags.
+
+  //! Contains mask of all registers, memory operands, immediate operands, and displacement operands.
+  kOpMask          = kRegMask | kMemMask | kVmMask | kImmMask | kRelMask
+};
+ASMJIT_DEFINE_ENUM_FLAGS(OpFlags)
+
+//! Operand signature.
+//!
+//! Contains all possible operand combinations, memory size information, and a fixed register id (or `BaseReg::kIdBad`
+//! if fixed id isn't required).
+struct OpSignature {
+  //! \name Members
+  //! \{
+
+  uint64_t _flags : 56;
+  uint64_t _regMask : 8;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns operand signature flags.
+  inline OpFlags flags() const noexcept { return (OpFlags)_flags; }
+
+  //! Tests whether the given `flag` is set.
+  inline bool hasFlag(OpFlags flag) const noexcept { return (_flags & uint64_t(flag)) != 0; }
+
+  //! Tests whether this signature contains at least one register operand of any type.
+  inline bool hasReg() const noexcept { return hasFlag(OpFlags::kRegMask); }
+  //! Tests whether this signature contains at least one scalar memory operand of any type.
+  inline bool hasMem() const noexcept { return hasFlag(OpFlags::kMemMask); }
+  //! Tests whether this signature contains at least one vector memory operand of any type.
+  inline bool hasVm() const noexcept { return hasFlag(OpFlags::kVmMask); }
+  //! Tests whether this signature contains at least one immediate operand of any type.
+  inline bool hasImm() const noexcept { return hasFlag(OpFlags::kImmMask); }
+  //! Tests whether this signature contains at least one relative displacement operand of any type.
+  inline bool hasRel() const noexcept { return hasFlag(OpFlags::kRelMask); }
+
+  //! Tests whether the operand is implicit.
+  inline bool isImplicit() const noexcept { return hasFlag(OpFlags::kFlagImplicit); }
+
+  //! Returns a physical register mask.
+  inline RegMask regMask() const noexcept { return _regMask; }
+
+  //! \}
+};
+
+ASMJIT_VARAPI const OpSignature _opSignatureTable[];
+
+//! Instruction signature.
+//!
+//! Contains a sequence of operands' combinations and other metadata that defines a single instruction. This data is
+//! used by instruction validator.
+struct InstSignature {
+  //! \name Members
+  //! \{
+
+  //! Count of operands in `opIndex` (0..6).
+  uint8_t _opCount : 3;
+  //! Architecture modes supported (X86 / X64).
+  uint8_t _mode : 2;
+  //! Number of implicit operands.
+  uint8_t _implicitOpCount : 3;
+  //! Reserved for future use.
+  uint8_t _reserved;
+  //! Indexes to `OpSignature` table.
+  uint8_t _opSignatureIndexes[Globals::kMaxOpCount];
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns instruction operation mode.
+  inline Mode mode() const noexcept { return (Mode)_mode; }
+  //! Tests whether the instruction supports the given operating mode.
+  inline bool supportsMode(Mode mode) const noexcept { return (uint8_t(_mode) & uint8_t(mode)) != 0; }
+
+  //! Returns the number of operands of this signature.
+  inline uint32_t opCount() const noexcept { return _opCount; }
+  //! Returns the number of implicit operands this signature has.
+  inline uint32_t implicitOpCount() const noexcept { return _implicitOpCount; }
+  //! Tests whether this instruction signature has at least one implicit operand.
+  inline bool hasImplicitOperands() const noexcept { return _implicitOpCount != 0; }
+
+  //! Returns indexes to \ref _opSignatureTable for each operand of the instruction.
+  //!
+  //! \note The returned array always provides indexes for all operands (see \ref Globals::kMaxOpCount) even if the
+  //! instruction provides less operands. Undefined operands have always index of zero.
+  inline const uint8_t* opSignatureIndexes() const noexcept { return _opSignatureIndexes; }
+
+  //! Returns index to \ref _opSignatureTable, corresponding to the requested operand `index` of the instruction.
+  inline uint8_t opSignatureIndex(size_t index) const noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxOpCount);
+    return _opSignatureIndexes[index];
+  }
+
+  //! Returns \ref OpSignature corresponding to the requested operand `index` of the instruction.
+  inline const OpSignature& opSignature(size_t index) const noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxOpCount);
+    return _opSignatureTable[_opSignatureIndexes[index]];
+  }
+
+  //! \}
+};
+
+ASMJIT_VARAPI const InstSignature _instSignatureTable[];
+
+//! Instruction flags.
+//!
+//! Details about instruction encoding, operation, features, and some limitations.
+enum class InstFlags : uint32_t {
+  //! No flags.
+  kNone = 0x00000000u,
+
+  // Instruction Family
+  // ------------------
+  //
+  // Instruction family information.
+
+  //! Instruction that accesses FPU registers.
+  kFpu = 0x00000100u,
+  //! Instruction that accesses MMX registers (including 3DNOW and GEODE) and EMMS.
+  kMmx = 0x00000200u,
+  //! Instruction that accesses XMM registers (SSE, AVX, AVX512).
+  kVec = 0x00000400u,
+
+  // FPU Flags
+  // ---------
+  //
+  // Used to tell the encoder which memory operand sizes are encodable.
+
+  //! FPU instruction can address `word_ptr` (shared with M80).
+  kFpuM16 = 0x00000800u,
+  //! FPU instruction can address `dword_ptr`.
+  kFpuM32 = 0x00001000u,
+  //! FPU instruction can address `qword_ptr`.
+  kFpuM64 = 0x00002000u,
+  //! FPU instruction can address `tword_ptr` (shared with M16).
+  kFpuM80 = 0x00000800u,
+
+  // Prefixes and Encoding Flags
+  // ---------------------------
+  //
+  // These describe optional X86 prefixes that can be used to change the instruction's operation.
+
+  //! Instruction can be prefixed with using the REP(REPE) or REPNE prefix.
+  kRep = 0x00004000u,
+  //! Rep prefix is accepted, but it has no effect other than being emitted with the instruction (as an extra byte).
+  kRepIgnored = 0x00008000u,
+  //! Instruction can be prefixed with using the LOCK prefix.
+  kLock = 0x00010000u,
+  //! Instruction can be prefixed with using the XACQUIRE prefix.
+  kXAcquire = 0x00020000u,
+  //! Instruction can be prefixed with using the XRELEASE prefix.
+  kXRelease = 0x00040000u,
+  //! Instruction uses MIB (BNDLDX|BNDSTX) to encode two registers.
+  kMib = 0x00080000u,
+  //! Instruction uses VSIB instead of legacy SIB.
+  kVsib = 0x00100000u,
+  //! Instruction uses TSIB (or SIB_MEM) encoding (MODRM followed by SIB).
+  kTsib = 0x00200000u,
+
+  // If both `kPrefixVex` and `kPrefixEvex` flags are specified it means that the instructions can be encoded
+  // by either VEX or EVEX prefix. In that case AsmJit checks global options and also instruction options to decide
+  // whether to emit VEX or EVEX prefix.
+
+  //! Instruction can be encoded by VEX|XOP (AVX|AVX2|BMI|XOP|...).
+  kVex = 0x00400000u,
+  //! Instruction can be encoded by EVEX (AVX512).
+  kEvex = 0x00800000u,
+  //! EVEX encoding is preferred over VEX encoding (AVX515_VNNI vs AVX_VNNI).
+  kPreferEvex = 0x01000000u,
+  //! EVEX and VEX signatures are compatible.
+  kEvexCompat = 0x02000000u,
+  //! EVEX instruction requires K register in the first operand (compare instructions).
+  kEvexKReg = 0x04000000u,
+  //! EVEX instruction requires two operands and K register as a selector (gather instructions).
+  kEvexTwoOp = 0x08000000u,
+  //! VEX instruction that can be transformed to a compatible EVEX instruction.
+  kEvexTransformable = 0x10000000u,
+
+  // Other Flags
+  // -----------
+
+  //! Instruction uses consecutive registers.
+  //!
+  //! Used by V4FMADDPS, V4FMADDSS, V4FNMADDPS, V4FNMADDSS, VP4DPWSSD, VP4DPWSSDS, VP2INTERSECTD, and VP2INTERSECTQ
+  //! instructions
+  kConsecutiveRegs = 0x20000000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(InstFlags)
+
+//! AVX-512 flags.
+enum class Avx512Flags : uint32_t {
+  //! No AVX-512 flags.
+  kNone = 0,
+
+  //! Internally used in tables, has no meaning.
+  k_ = 0x00000000u,
+  //! Supports masking {k1..k7}.
+  kK = 0x00000001u,
+  //! Supports zeroing {z}, must be used together with `kAvx512k`.
+  kZ = 0x00000002u,
+  //! Supports 'embedded-rounding' {er} with implicit {sae},
+  kER = 0x00000004u,
+  //! Supports 'suppress-all-exceptions' {sae}.
+  kSAE = 0x00000008u,
+  //! Supports 16-bit broadcast 'b16'.
+  kB16 = 0x00000010u,
+  //! Supports 32-bit broadcast 'b32'.
+  kB32 = 0x00000020u,
+  //! Supports 64-bit broadcast 'b64'.
+  kB64 = 0x00000040u,
+  //! Operates on a vector of consecutive registers (AVX512_4FMAPS and AVX512_4VNNIW).
+  kT4X = 0x00000080u,
+
+  //! Implicit zeroing if {k} masking is used. Using {z} is not valid in this case as it's implicit.
+  kImplicitZ = 0x00000100,
+};
+ASMJIT_DEFINE_ENUM_FLAGS(Avx512Flags)
+
+//! Instruction common information.
+//!
+//! Aggregated information shared across one or more instruction.
+struct CommonInfo {
+  //! Instruction flags.
+  uint32_t _flags;
+  //! Reserved for future use.
+  uint32_t _avx512Flags : 11;
+  //! First `InstSignature` entry in the database.
+  uint32_t _iSignatureIndex : 11;
+  //! Number of relevant `ISignature` entries.
+  uint32_t _iSignatureCount : 5;
+  //! Instruction control flow category, see \ref InstControlFlow.
+  uint32_t _controlFlow : 3;
+  //! Specifies what happens if all source operands share the same register.
+  uint32_t _sameRegHint : 2;
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns instruction flags.
+  inline InstFlags flags() const noexcept { return (InstFlags)_flags; }
+  //! Tests whether the instruction has a `flag`.
+  inline bool hasFlag(InstFlags flag) const noexcept { return Support::test(_flags, flag); }
+
+  //! Returns instruction AVX-512 flags.
+  inline Avx512Flags avx512Flags() const noexcept { return (Avx512Flags)_avx512Flags; }
+  //! Tests whether the instruction has an AVX-512 `flag`.
+  inline bool hasAvx512Flag(Avx512Flags flag) const noexcept { return Support::test(_avx512Flags, flag); }
+
+  //! Tests whether the instruction is FPU instruction.
+  inline bool isFpu() const noexcept { return hasFlag(InstFlags::kFpu); }
+  //! Tests whether the instruction is MMX/3DNOW instruction that accesses MMX registers (includes EMMS and FEMMS).
+  inline bool isMmx() const noexcept { return hasFlag(InstFlags::kMmx); }
+  //! Tests whether the instruction is SSE|AVX|AVX512 instruction that accesses XMM|YMM|ZMM registers.
+  inline bool isVec() const noexcept { return hasFlag(InstFlags::kVec); }
+  //! Tests whether the instruction is SSE+ (SSE4.2, AES, SHA included) instruction that accesses XMM registers.
+  inline bool isSse() const noexcept { return (flags() & (InstFlags::kVec | InstFlags::kVex | InstFlags::kEvex)) == InstFlags::kVec; }
+  //! Tests whether the instruction is AVX+ (FMA included) instruction that accesses XMM|YMM|ZMM registers.
+  inline bool isAvx() const noexcept { return isVec() && isVexOrEvex(); }
+
+  //! Tests whether the instruction can be prefixed with LOCK prefix.
+  inline bool hasLockPrefix() const noexcept { return hasFlag(InstFlags::kLock); }
+  //! Tests whether the instruction can be prefixed with REP (REPE|REPZ) prefix.
+  inline bool hasRepPrefix() const noexcept { return hasFlag(InstFlags::kRep); }
+  //! Tests whether the instruction can be prefixed with XACQUIRE prefix.
+  inline bool hasXAcquirePrefix() const noexcept { return hasFlag(InstFlags::kXAcquire); }
+  //! Tests whether the instruction can be prefixed with XRELEASE prefix.
+  inline bool hasXReleasePrefix() const noexcept { return hasFlag(InstFlags::kXRelease); }
+
+  //! Tests whether the rep prefix is supported by the instruction, but ignored (has no effect).
+  inline bool isRepIgnored() const noexcept { return hasFlag(InstFlags::kRepIgnored); }
+  //! Tests whether the instruction uses MIB.
+  inline bool isMibOp() const noexcept { return hasFlag(InstFlags::kMib); }
+  //! Tests whether the instruction uses VSIB.
+  inline bool isVsibOp() const noexcept { return hasFlag(InstFlags::kVsib); }
+  //! Tests whether the instruction uses TSIB (AMX, instruction requires MOD+SIB).
+  inline bool isTsibOp() const noexcept { return hasFlag(InstFlags::kTsib); }
+  //! Tests whether the instruction uses VEX (can be set together with EVEX if both are encodable).
+  inline bool isVex() const noexcept { return hasFlag(InstFlags::kVex); }
+  //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
+  inline bool isEvex() const noexcept { return hasFlag(InstFlags::kEvex); }
+  //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
+  inline bool isVexOrEvex() const noexcept { return hasFlag(InstFlags::kVex | InstFlags::kEvex); }
+
+  //! Tests whether the instruction should prefer EVEX prefix instead of VEX prefix.
+  inline bool preferEvex() const noexcept { return hasFlag(InstFlags::kPreferEvex); }
+
+  inline bool isEvexCompatible() const noexcept { return hasFlag(InstFlags::kEvexCompat); }
+  inline bool isEvexKRegOnly() const noexcept { return hasFlag(InstFlags::kEvexKReg); }
+  inline bool isEvexTwoOpOnly() const noexcept { return hasFlag(InstFlags::kEvexTwoOp); }
+  inline bool isEvexTransformable() const noexcept { return hasFlag(InstFlags::kEvexTransformable); }
+
+  //! Tests whether the instruction supports AVX512 masking {k}.
+  inline bool hasAvx512K() const noexcept { return hasAvx512Flag(Avx512Flags::kK); }
+  //! Tests whether the instruction supports AVX512 zeroing {k}{z}.
+  inline bool hasAvx512Z() const noexcept { return hasAvx512Flag(Avx512Flags::kZ); }
+  //! Tests whether the instruction supports AVX512 embedded-rounding {er}.
+  inline bool hasAvx512ER() const noexcept { return hasAvx512Flag(Avx512Flags::kER); }
+  //! Tests whether the instruction supports AVX512 suppress-all-exceptions {sae}.
+  inline bool hasAvx512SAE() const noexcept { return hasAvx512Flag(Avx512Flags::kSAE); }
+  //! Tests whether the instruction supports AVX512 broadcast (either 32-bit or 64-bit).
+  inline bool hasAvx512B() const noexcept { return hasAvx512Flag(Avx512Flags::kB16 | Avx512Flags::kB32 | Avx512Flags::kB64); }
+  //! Tests whether the instruction supports AVX512 broadcast (16-bit).
+  inline bool hasAvx512B16() const noexcept { return hasAvx512Flag(Avx512Flags::kB16); }
+  //! Tests whether the instruction supports AVX512 broadcast (32-bit).
+  inline bool hasAvx512B32() const noexcept { return hasAvx512Flag(Avx512Flags::kB32); }
+  //! Tests whether the instruction supports AVX512 broadcast (64-bit).
+  inline bool hasAvx512B64() const noexcept { return hasAvx512Flag(Avx512Flags::kB64); }
+
+  // Returns the size of the broadcast - either 2, 4, or 8, or 0 if broadcast is not supported.
+  inline uint32_t broadcastSize() const noexcept {
+    constexpr uint32_t kShift = Support::ConstCTZ<uint32_t(Avx512Flags::kB16)>::value;
+    return (uint32_t(_avx512Flags) & uint32_t(Avx512Flags::kB16 | Avx512Flags::kB32 | Avx512Flags::kB64)) >> (kShift - 1);
+  }
+
+  inline uint32_t signatureIndex() const noexcept { return _iSignatureIndex; }
+  inline uint32_t signatureCount() const noexcept { return _iSignatureCount; }
+
+  inline const InstSignature* signatureData() const noexcept { return _instSignatureTable + _iSignatureIndex; }
+  inline const InstSignature* signatureEnd() const noexcept { return _instSignatureTable + _iSignatureIndex + _iSignatureCount; }
+
+  //! Returns a control flow category of the instruction.
+  inline InstControlFlow controlFlow() const noexcept { return (InstControlFlow)_controlFlow; }
+
+  //! Returns a hint that can be used when both inputs are the same register.
+  inline InstSameRegHint sameRegHint() const noexcept { return (InstSameRegHint)_sameRegHint; }
+
+  //! \}
+};
+
+ASMJIT_VARAPI const CommonInfo _commonInfoTable[];
+
+//! Instruction information.
+struct InstInfo {
+  //! Index to \ref _nameData.
+  uint32_t _nameDataIndex : 14;
+  //! Index to \ref _commonInfoTable.
+  uint32_t _commonInfoIndex : 10;
+  //! Index to \ref _additionalInfoTable.
+  uint32_t _additionalInfoIndex : 8;
+
+  //! Instruction encoding (internal encoding identifier used by \ref Assembler).
+  uint8_t _encoding;
+  //! Main opcode value (0..255).
+  uint8_t _mainOpcodeValue;
+  //! Index to \ref _mainOpcodeTable` that is combined with \ref _mainOpcodeValue to form the final opcode.
+  uint8_t _mainOpcodeIndex;
+  //! Index to \ref _altOpcodeTable that contains a full alternative opcode.
+  uint8_t _altOpcodeIndex;
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns common information, see \ref CommonInfo.
+  inline const CommonInfo& commonInfo() const noexcept { return _commonInfoTable[_commonInfoIndex]; }
+
+  //! Returns instruction flags, see \ref Flags.
+  inline InstFlags flags() const noexcept { return commonInfo().flags(); }
+  //! Tests whether the instruction has flag `flag`, see \ref Flags.
+  inline bool hasFlag(InstFlags flag) const noexcept { return commonInfo().hasFlag(flag); }
+
+  //! Returns instruction AVX-512 flags, see \ref Avx512Flags.
+  inline Avx512Flags avx512Flags() const noexcept { return commonInfo().avx512Flags(); }
+  //! Tests whether the instruction has an AVX-512 `flag`, see \ref Avx512Flags.
+  inline bool hasAvx512Flag(Avx512Flags flag) const noexcept { return commonInfo().hasAvx512Flag(flag); }
+
+  //! Tests whether the instruction is FPU instruction.
+  inline bool isFpu() const noexcept { return commonInfo().isFpu(); }
+  //! Tests whether the instruction is MMX/3DNOW instruction that accesses MMX registers (includes EMMS and FEMMS).
+  inline bool isMmx() const noexcept { return commonInfo().isMmx(); }
+  //! Tests whether the instruction is SSE|AVX|AVX512 instruction that accesses XMM|YMM|ZMM registers.
+  inline bool isVec() const noexcept { return commonInfo().isVec(); }
+  //! Tests whether the instruction is SSE+ (SSE4.2, AES, SHA included) instruction that accesses XMM registers.
+  inline bool isSse() const noexcept { return commonInfo().isSse(); }
+  //! Tests whether the instruction is AVX+ (FMA included) instruction that accesses XMM|YMM|ZMM registers.
+  inline bool isAvx() const noexcept { return commonInfo().isAvx(); }
+
+  //! Tests whether the instruction can be prefixed with LOCK prefix.
+  inline bool hasLockPrefix() const noexcept { return commonInfo().hasLockPrefix(); }
+  //! Tests whether the instruction can be prefixed with REP (REPE|REPZ) prefix.
+  inline bool hasRepPrefix() const noexcept { return commonInfo().hasRepPrefix(); }
+  //! Tests whether the instruction can be prefixed with XACQUIRE prefix.
+  inline bool hasXAcquirePrefix() const noexcept { return commonInfo().hasXAcquirePrefix(); }
+  //! Tests whether the instruction can be prefixed with XRELEASE prefix.
+  inline bool hasXReleasePrefix() const noexcept { return commonInfo().hasXReleasePrefix(); }
+
+  //! Tests whether the rep prefix is supported by the instruction, but ignored (has no effect).
+  inline bool isRepIgnored() const noexcept { return commonInfo().isRepIgnored(); }
+  //! Tests whether the instruction uses MIB.
+  inline bool isMibOp() const noexcept { return hasFlag(InstFlags::kMib); }
+  //! Tests whether the instruction uses VSIB.
+  inline bool isVsibOp() const noexcept { return hasFlag(InstFlags::kVsib); }
+  //! Tests whether the instruction uses VEX (can be set together with EVEX if both are encodable).
+  inline bool isVex() const noexcept { return hasFlag(InstFlags::kVex); }
+  //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
+  inline bool isEvex() const noexcept { return hasFlag(InstFlags::kEvex); }
+  //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
+  inline bool isVexOrEvex() const noexcept { return hasFlag(InstFlags::kVex | InstFlags::kEvex); }
+
+  inline bool isEvexCompatible() const noexcept { return hasFlag(InstFlags::kEvexCompat); }
+  inline bool isEvexKRegOnly() const noexcept { return hasFlag(InstFlags::kEvexKReg); }
+  inline bool isEvexTwoOpOnly() const noexcept { return hasFlag(InstFlags::kEvexTwoOp); }
+  inline bool isEvexTransformable() const noexcept { return hasFlag(InstFlags::kEvexTransformable); }
+
+  //! Tests whether the instruction supports AVX512 masking {k}.
+  inline bool hasAvx512K() const noexcept { return hasAvx512Flag(Avx512Flags::kK); }
+  //! Tests whether the instruction supports AVX512 zeroing {k}{z}.
+  inline bool hasAvx512Z() const noexcept { return hasAvx512Flag(Avx512Flags::kZ); }
+  //! Tests whether the instruction supports AVX512 embedded-rounding {er}.
+  inline bool hasAvx512ER() const noexcept { return hasAvx512Flag(Avx512Flags::kER); }
+  //! Tests whether the instruction supports AVX512 suppress-all-exceptions {sae}.
+  inline bool hasAvx512SAE() const noexcept { return hasAvx512Flag(Avx512Flags::kSAE); }
+  //! Tests whether the instruction supports AVX512 broadcast (either 32-bit or 64-bit).
+  inline bool hasAvx512B() const noexcept { return hasAvx512Flag(Avx512Flags::kB16 | Avx512Flags::kB32 | Avx512Flags::kB64); }
+  //! Tests whether the instruction supports AVX512 broadcast (16-bit).
+  inline bool hasAvx512B16() const noexcept { return hasAvx512Flag(Avx512Flags::kB16); }
+  //! Tests whether the instruction supports AVX512 broadcast (32-bit).
+  inline bool hasAvx512B32() const noexcept { return hasAvx512Flag(Avx512Flags::kB32); }
+  //! Tests whether the instruction supports AVX512 broadcast (64-bit).
+  inline bool hasAvx512B64() const noexcept { return hasAvx512Flag(Avx512Flags::kB64); }
+
+  //! Returns a control flow category of the instruction.
+  inline InstControlFlow controlFlow() const noexcept { return commonInfo().controlFlow(); }
+  //! Returns a hint that can be used when both inputs are the same register.
+  inline InstSameRegHint sameRegHint() const noexcept { return commonInfo().sameRegHint(); }
+
+  inline uint32_t signatureIndex() const noexcept { return commonInfo().signatureIndex(); }
+  inline uint32_t signatureCount() const noexcept { return commonInfo().signatureCount(); }
+
+  inline const InstSignature* signatureData() const noexcept { return commonInfo().signatureData(); }
+  inline const InstSignature* signatureEnd() const noexcept { return commonInfo().signatureEnd(); }
+
+  //! \}
+};
+
+ASMJIT_VARAPI const InstInfo _instInfoTable[];
+
+static inline const InstInfo& infoById(InstId instId) noexcept {
+  ASMJIT_ASSERT(Inst::isDefinedId(instId));
+  return _instInfoTable[instId];
+}
+
+//! \cond INTERNAL
+static_assert(sizeof(OpSignature) == 8, "InstDB::OpSignature must be 8 bytes long");
+//! \endcond
+
+} // {InstDB}
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86INSTDB_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86instdb_p.h b/lib/lepton/asmjit/x86/x86instdb_p.h
new file mode 100644
index 0000000000..b8e12e16b4
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86instdb_p.h
@@ -0,0 +1,311 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86INSTDB_P_H_INCLUDED
+#define ASMJIT_X86_X86INSTDB_P_H_INCLUDED
+
+#include "../x86/x86instdb.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+namespace InstDB {
+
+//! Instruction encoding (X86).
+//!
+//! This is a specific identifier that is used by AsmJit to describe the way each instruction is encoded. Some
+//! encodings are special only for a single instruction as X86 instruction set contains a lot of legacy encodings,
+//! and some encodings describe a group of instructions that share some commons, like MMX, SSE, AVX, AVX512
+//! instructions, etc...
+enum EncodingId : uint32_t {
+  kEncodingNone = 0,                     //!< Never used.
+  kEncodingX86Op,                        //!< X86 [OP].
+  kEncodingX86Op_Mod11RM,                //!< X86 [OP] (opcode with ModRM byte where MOD must be 11b).
+  kEncodingX86Op_Mod11RM_I8,             //!< X86 [OP] (opcode with ModRM byte + 8-bit immediate).
+  kEncodingX86Op_xAddr,                  //!< X86 [OP] (implicit address in the first register operand).
+  kEncodingX86Op_xAX,                    //!< X86 [OP] (implicit or explicit '?AX' form).
+  kEncodingX86Op_xDX_xAX,                //!< X86 [OP] (implicit or explicit '?DX, ?AX' form).
+  kEncodingX86Op_MemZAX,                 //!< X86 [OP] (implicit or explicit '[EAX|RAX]' form).
+  kEncodingX86I_xAX,                     //!< X86 [I] (implicit or explicit '?AX' form).
+  kEncodingX86M,                         //!< X86 [M] (handles 2|4|8-bytes size).
+  kEncodingX86M_NoMemSize,               //!< X86 [M] (handles 2|4|8-bytes size, but doesn't consider memory size).
+  kEncodingX86M_NoSize,                  //!< X86 [M] (doesn't handle any size).
+  kEncodingX86M_GPB,                     //!< X86 [M] (handles single-byte size).
+  kEncodingX86M_GPB_MulDiv,              //!< X86 [M] (like GPB, handles implicit|explicit MUL|DIV|IDIV).
+  kEncodingX86M_Only,                    //!< X86 [M] (restricted to memory operand of any size).
+  kEncodingX86M_Only_EDX_EAX,            //!< X86 [M] (memory operand only, followed by implicit <edx> and <eax>).
+  kEncodingX86M_Nop,                     //!< X86 [M] (special case of NOP instruction).
+  kEncodingX86R_Native,                  //!< X86 [R] (register must be either 32-bit or 64-bit depending on arch).
+  kEncodingX86R_FromM,                   //!< X86 [R] - which specifies memory address.
+  kEncodingX86R32_EDX_EAX,               //!< X86 [R32] followed by implicit EDX and EAX.
+  kEncodingX86Rm,                        //!< X86 [RM] (doesn't handle single-byte size).
+  kEncodingX86Rm_Raw66H,                 //!< X86 [RM] (used by LZCNT, POPCNT, and TZCNT).
+  kEncodingX86Rm_NoSize,                 //!< X86 [RM] (doesn't add REX.W prefix if 64-bit reg is used).
+  kEncodingX86Mr,                        //!< X86 [MR] (doesn't handle single-byte size).
+  kEncodingX86Mr_NoSize,                 //!< X86 [MR] (doesn't handle any size).
+  kEncodingX86Arith,                     //!< X86 adc, add, and, cmp, or, sbb, sub, xor.
+  kEncodingX86Bswap,                     //!< X86 bswap.
+  kEncodingX86Bt,                        //!< X86 bt, btc, btr, bts.
+  kEncodingX86Call,                      //!< X86 call.
+  kEncodingX86Cmpxchg,                   //!< X86 [MR] cmpxchg.
+  kEncodingX86Cmpxchg8b_16b,             //!< X86 [MR] cmpxchg8b, cmpxchg16b.
+  kEncodingX86Crc,                       //!< X86 crc32.
+  kEncodingX86Enter,                     //!< X86 enter.
+  kEncodingX86Imul,                      //!< X86 imul.
+  kEncodingX86In,                        //!< X86 in.
+  kEncodingX86Ins,                       //!< X86 ins[b|q|d].
+  kEncodingX86IncDec,                    //!< X86 inc, dec.
+  kEncodingX86Int,                       //!< X86 int (interrupt).
+  kEncodingX86Jcc,                       //!< X86 jcc.
+  kEncodingX86JecxzLoop,                 //!< X86 jcxz, jecxz, jrcxz, loop, loope, loopne.
+  kEncodingX86Jmp,                       //!< X86 jmp.
+  kEncodingX86JmpRel,                    //!< X86 xbegin.
+  kEncodingX86LcallLjmp,                 //!< X86 lcall/ljmp.
+  kEncodingX86Lea,                       //!< X86 lea.
+  kEncodingX86Mov,                       //!< X86 mov (all possible cases).
+  kEncodingX86Movabs,                    //!< X86 movabs.
+  kEncodingX86MovsxMovzx,                //!< X86 movsx, movzx.
+  kEncodingX86MovntiMovdiri,             //!< X86 movnti/movdiri.
+  kEncodingX86EnqcmdMovdir64b,           //!< X86 enqcmd/enqcmds/movdir64b.
+  kEncodingX86Out,                       //!< X86 out.
+  kEncodingX86Outs,                      //!< X86 out[b|w|d].
+  kEncodingX86Push,                      //!< X86 push.
+  kEncodingX86Pop,                       //!< X86 pop.
+  kEncodingX86Ret,                       //!< X86 ret.
+  kEncodingX86Rot,                       //!< X86 rcl, rcr, rol, ror, sal, sar, shl, shr.
+  kEncodingX86Set,                       //!< X86 setcc.
+  kEncodingX86ShldShrd,                  //!< X86 shld, shrd.
+  kEncodingX86StrRm,                     //!< X86 lods.
+  kEncodingX86StrMr,                     //!< X86 scas, stos.
+  kEncodingX86StrMm,                     //!< X86 cmps, movs.
+  kEncodingX86Test,                      //!< X86 test.
+  kEncodingX86Xadd,                      //!< X86 xadd.
+  kEncodingX86Xchg,                      //!< X86 xchg.
+  kEncodingX86Fence,                     //!< X86 lfence, mfence, sfence.
+  kEncodingX86Bndmov,                    //!< X86 [RM|MR] (used by BNDMOV).
+  kEncodingFpuOp,                        //!< FPU [OP].
+  kEncodingFpuArith,                     //!< FPU fadd, fdiv, fdivr, fmul, fsub, fsubr.
+  kEncodingFpuCom,                       //!< FPU fcom, fcomp.
+  kEncodingFpuFldFst,                    //!< FPU fld, fst, fstp.
+  kEncodingFpuM,                         //!< FPU fiadd, ficom, ficomp, fidiv, fidivr, fild, fimul, fist, fistp, fisttp, fisub, fisubr.
+  kEncodingFpuR,                         //!< FPU fcmov, fcomi, fcomip, ffree, fucom, fucomi, fucomip, fucomp, fxch.
+  kEncodingFpuRDef,                      //!< FPU faddp, fdivp, fdivrp, fmulp, fsubp, fsubrp.
+  kEncodingFpuStsw,                      //!< FPU fnstsw, Fstsw.
+  kEncodingExtRm,                        //!< EXT [RM].
+  kEncodingExtRm_XMM0,                   //!< EXT [RM<XMM0>].
+  kEncodingExtRm_ZDI,                    //!< EXT [RM<ZDI>].
+  kEncodingExtRm_P,                      //!< EXT [RM] (propagates 66H if the instruction uses XMM register).
+  kEncodingExtRm_Wx,                     //!< EXT [RM] (propagates REX.W if GPQ is used or the second operand is GPQ/QWORD_PTR).
+  kEncodingExtRm_Wx_GpqOnly,             //!< EXT [RM] (propagates REX.W if the first operand is GPQ register).
+  kEncodingExtRmRi,                      //!< EXT [RM|RI].
+  kEncodingExtRmRi_P,                    //!< EXT [RM|RI] (propagates 66H if the instruction uses XMM register).
+  kEncodingExtRmi,                       //!< EXT [RMI].
+  kEncodingExtRmi_P,                     //!< EXT [RMI] (propagates 66H if the instruction uses XMM register).
+  kEncodingExtPextrw,                    //!< EXT pextrw.
+  kEncodingExtExtract,                   //!< EXT pextrb, pextrd, pextrq, extractps.
+  kEncodingExtMov,                       //!< EXT mov?? - #1:[MM|XMM, MM|XMM|Mem] #2:[MM|XMM|Mem, MM|XMM].
+  kEncodingExtMovbe,                     //!< EXT movbe.
+  kEncodingExtMovd,                      //!< EXT movd.
+  kEncodingExtMovq,                      //!< EXT movq.
+  kEncodingExtExtrq,                     //!< EXT extrq (SSE4A).
+  kEncodingExtInsertq,                   //!< EXT insrq (SSE4A).
+  kEncodingExt3dNow,                     //!< EXT [RMI] (3DNOW specific).
+  kEncodingVexOp,                        //!< VEX [OP].
+  kEncodingVexOpMod,                     //!< VEX [OP] with MODR/M.
+  kEncodingVexKmov,                      //!< VEX [RM|MR] (used by kmov[b|w|d|q]).
+  kEncodingVexR_Wx,                      //!< VEX|EVEX [R] (propagatex VEX.W if GPQ used).
+  kEncodingVexM,                         //!< VEX|EVEX [M].
+  kEncodingVexM_VM,                      //!< VEX|EVEX [M] (propagates VEX|EVEX.L, VSIB support).
+  kEncodingVexMr_Lx,                     //!< VEX|EVEX [MR] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexMr_VM,                     //!< VEX|EVEX [MR] (VSIB support).
+  kEncodingVexMri,                       //!< VEX|EVEX [MRI].
+  kEncodingVexMri_Lx,                    //!< VEX|EVEX [MRI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexMri_Vpextrw,               //!< VEX|EVEX [MRI] (special case required by VPEXTRW instruction).
+  kEncodingVexRm,                        //!< VEX|EVEX [RM].
+  kEncodingVexRm_ZDI,                    //!< VEX|EVEX [RM<ZDI>].
+  kEncodingVexRm_Wx,                     //!< VEX|EVEX [RM] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexRm_Lx,                     //!< VEX|EVEX [RM] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRm_Lx_Narrow,              //!< VEX|EVEX [RM] (the destination vector size is narrowed).
+  kEncodingVexRm_Lx_Bcst,                //!< VEX|EVEX [RM] (can handle broadcast r32/r64).
+  kEncodingVexRm_VM,                     //!< VEX|EVEX [RM] (propagates VEX|EVEX.L, VSIB support).
+  kEncodingVexRm_T1_4X,                  //!<     EVEX [RM] (used by NN instructions that use RM-T1_4X encoding).
+  kEncodingVexRmi,                       //!< VEX|EVEX [RMI].
+  kEncodingVexRmi_Wx,                    //!< VEX|EVEX [RMI] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexRmi_Lx,                    //!< VEX|EVEX [RMI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvm,                       //!< VEX|EVEX [RVM].
+  kEncodingVexRvm_Wx,                    //!< VEX|EVEX [RVM] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexRvm_ZDX_Wx,                //!< VEX|EVEX [RVM<ZDX>] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexRvm_Lx,                    //!< VEX|EVEX [RVM] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvm_Lx_KEvex,              //!< VEX|EVEX [RVM] (forces EVEX prefix if K register is used on destination).
+  kEncodingVexRvm_Lx_2xK,                //!< VEX|EVEX [RVM] (vp2intersectd/vp2intersectq).
+  kEncodingVexRvmr,                      //!< VEX|EVEX [RVMR].
+  kEncodingVexRvmr_Lx,                   //!< VEX|EVEX [RVMR] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmi,                      //!< VEX|EVEX [RVMI].
+  kEncodingVexRvmi_KEvex,                //!< VEX|EVEX [RVMI] (forces EVEX prefix if K register is used on destination).
+  kEncodingVexRvmi_Lx,                   //!< VEX|EVEX [RVMI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmi_Lx_KEvex,             //!< VEX|EVEX [RVMI] (forces EVEX prefix if K register is used on destination).
+  kEncodingVexRmv,                       //!< VEX|EVEX [RMV].
+  kEncodingVexRmv_Wx,                    //!< VEX|EVEX [RMV] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexRmv_VM,                    //!< VEX|EVEX [RMV] (propagates VEX|EVEX.L, VSIB support).
+  kEncodingVexRmvRm_VM,                  //!< VEX|EVEX [RMV|RM] (propagates VEX|EVEX.L, VSIB support).
+  kEncodingVexRmvi,                      //!< VEX|EVEX [RMVI].
+  kEncodingVexRmMr,                      //!< VEX|EVEX [RM|MR].
+  kEncodingVexRmMr_Lx,                   //!< VEX|EVEX [RM|MR] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmRmv,                    //!< VEX|EVEX [RVM|RMV].
+  kEncodingVexRvmRmi,                    //!< VEX|EVEX [RVM|RMI].
+  kEncodingVexRvmRmi_Lx,                 //!< VEX|EVEX [RVM|RMI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmRmvRmi,                 //!< VEX|EVEX [RVM|RMV|RMI].
+  kEncodingVexRvmMr,                     //!< VEX|EVEX [RVM|MR].
+  kEncodingVexRvmMvr,                    //!< VEX|EVEX [RVM|MVR].
+  kEncodingVexRvmMvr_Lx,                 //!< VEX|EVEX [RVM|MVR] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmVmi,                    //!< VEX|EVEX [RVM|VMI].
+  kEncodingVexRvmVmi_Lx,                 //!< VEX|EVEX [RVM|VMI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmVmi_Lx_MEvex,           //!< VEX|EVEX [RVM|VMI] (propagates EVEX if the second operand is memory).
+  kEncodingVexVm,                        //!< VEX|EVEX [VM].
+  kEncodingVexVm_Wx,                     //!< VEX|EVEX [VM] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexVmi,                       //!< VEX|EVEX [VMI].
+  kEncodingVexVmi_Lx,                    //!< VEX|EVEX [VMI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexVmi4_Wx,                   //!< VEX|EVEX [VMI] (propagates VEX|EVEX.W if GPQ used, DWORD Immediate).
+  kEncodingVexVmi_Lx_MEvex,              //!< VEX|EVEX [VMI] (force EVEX prefix when the second operand is memory)
+  kEncodingVexRvrmRvmr,                  //!< VEX|EVEX [RVRM|RVMR].
+  kEncodingVexRvrmRvmr_Lx,               //!< VEX|EVEX [RVRM|RVMR] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvrmiRvmri_Lx,             //!< VEX|EVEX [RVRMI|RVMRI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexMovdMovq,                  //!< VEX|EVEX vmovd, vmovq.
+  kEncodingVexMovssMovsd,                //!< VEX|EVEX vmovss, vmovsd.
+  kEncodingFma4,                         //!< FMA4 [R, R, R/M, R/M].
+  kEncodingFma4_Lx,                      //!< FMA4 [R, R, R/M, R/M] (propagates AVX.L if YMM used).
+  kEncodingAmxCfg,                       //!< AMX ldtilecfg/sttilecfg.
+  kEncodingAmxR,                         //!< AMX [R] - tilezero.
+  kEncodingAmxRm,                        //!< AMX tileloadd/tileloaddt1.
+  kEncodingAmxMr,                        //!< AMX tilestored.
+  kEncodingAmxRmv,                       //!< AMX instructions that use TMM registers.
+  kEncodingCount                         //!< Count of instruction encodings.
+};
+
+//! Additional information table, provides CPU extensions required to execute an instruction and RW flags.
+struct AdditionalInfo {
+  //! Index to `_instFlagsTable`.
+  uint8_t _instFlagsIndex;
+  //! Index to `_rwFlagsTable`.
+  uint8_t _rwFlagsIndex;
+  //! Features vector.
+  uint8_t _features[6];
+
+  inline const uint8_t* featuresBegin() const noexcept { return _features; }
+  inline const uint8_t* featuresEnd() const noexcept { return _features + ASMJIT_ARRAY_SIZE(_features); }
+};
+
+// ${NameLimits:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+enum : uint32_t { kMaxNameSize = 17 };
+// ----------------------------------------------------------------------------
+// ${NameLimits:End}
+
+struct InstNameIndex {
+  uint16_t start;
+  uint16_t end;
+};
+
+struct RWInfo {
+  enum Category : uint8_t {
+    kCategoryGeneric,
+    kCategoryMov,
+    kCategoryMovabs,
+    kCategoryImul,
+    kCategoryMovh64,
+    kCategoryPunpcklxx,
+    kCategoryVmaskmov,
+    kCategoryVmovddup,
+    kCategoryVmovmskpd,
+    kCategoryVmovmskps,
+    kCategoryVmov1_2,
+    kCategoryVmov1_4,
+    kCategoryVmov1_8,
+    kCategoryVmov2_1,
+    kCategoryVmov4_1,
+    kCategoryVmov8_1
+  };
+
+  uint8_t category;
+  uint8_t rmInfo;
+  uint8_t opInfoIndex[6];
+};
+
+struct RWInfoOp {
+  uint64_t rByteMask;
+  uint64_t wByteMask;
+  uint8_t physId;
+  uint8_t consecutiveLeadCount;
+  uint8_t reserved[2];
+  OpRWFlags flags;
+};
+
+//! R/M information.
+//!
+//! This data is used to replace register operand by a memory operand reliably.
+struct RWInfoRm {
+  enum Category : uint8_t {
+    kCategoryNone = 0,
+    kCategoryFixed,
+    kCategoryConsistent,
+    kCategoryHalf,
+    kCategoryQuarter,
+    kCategoryEighth
+  };
+
+  enum Flags : uint8_t {
+    kFlagAmbiguous = 0x01,
+    //! Special semantics for PEXTRW - memory operand can only be used with SSE4.1 instruction and it's forbidden in MMX.
+    kFlagPextrw = 0x02,
+    //! Special semantics for MOVSS and MOVSD - doesn't zero extend the destination if the operation is a reg to reg move.
+    kFlagMovssMovsd = 0x04,
+    //! Special semantics for AVX shift instructions that do not provide reg/mem in AVX/AVX2 mode (AVX-512 is required).
+    kFlagFeatureIfRMI = 0x08
+  };
+
+  uint8_t category;
+  uint8_t rmOpsMask;
+  uint8_t fixedSize;
+  uint8_t flags;
+  uint8_t rmFeature;
+};
+
+struct RWFlagsInfoTable {
+  //! CPU/FPU flags read.
+  uint32_t readFlags;
+  //! CPU/FPU flags written or undefined.
+  uint32_t writeFlags;
+};
+
+extern const uint8_t rwInfoIndexA[Inst::_kIdCount];
+extern const uint8_t rwInfoIndexB[Inst::_kIdCount];
+extern const RWInfo rwInfoA[];
+extern const RWInfo rwInfoB[];
+extern const RWInfoOp rwInfoOp[];
+extern const RWInfoRm rwInfoRm[];
+extern const RWFlagsInfoTable _rwFlagsInfoTable[];
+extern const InstRWFlags _instFlagsTable[];
+
+extern const uint32_t _mainOpcodeTable[];
+extern const uint32_t _altOpcodeTable[];
+
+#ifndef ASMJIT_NO_TEXT
+extern const char _nameData[];
+extern const InstNameIndex instNameIndex[26];
+#endif // !ASMJIT_NO_TEXT
+
+extern const AdditionalInfo _additionalInfoTable[];
+
+} // {InstDB}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86INSTDB_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86opcode_p.h b/lib/lepton/asmjit/x86/x86opcode_p.h
new file mode 100644
index 0000000000..94a76f0481
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86opcode_p.h
@@ -0,0 +1,436 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86OPCODE_P_H_INCLUDED
+#define ASMJIT_X86_X86OPCODE_P_H_INCLUDED
+
+#include "../x86/x86globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+//! Helper class to store and manipulate X86 opcodes.
+//!
+//! The first 8 least significant bits describe the opcode byte as defined in ISA manuals, all other bits
+//! describe other properties like prefixes, see `Opcode::Bits` for more information.
+struct Opcode {
+  uint32_t v;
+
+  //! Describes a meaning of all bits of AsmJit's 32-bit opcode value.
+  //!
+  //! This schema is AsmJit specific and has been designed to allow encoding of all X86 instructions available. X86,
+  //! MMX, and SSE+ instructions always use `MM` and `PP` fields, which are encoded to corresponding prefixes needed
+  //! by X86 or SIMD instructions. AVX+ instructions embed `MMMMM` and `PP` fields in a VEX prefix, and AVX-512
+  //! instructions embed `MM` and `PP` in EVEX prefix.
+  //!
+  //! The instruction opcode definition uses 1 or 2 bytes as an opcode value. 1 byte is needed by most of the
+  //! instructions, 2 bytes are only used by legacy X87-FPU instructions. This means that a second byte is free to
+  //! by used by instructions encoded by using VEX and/or EVEX prefix.
+  //!
+  //! The fields description:
+  //!
+  //! - `MM` field is used to encode prefixes needed by the instruction or as a part of VEX/EVEX prefix. Described as
+  //!   `mm` and `mmmmm` in instruction manuals.
+  //!
+  //!   NOTE: Since `MM` field is defined as `mmmmm` (5 bits), but only 2 least significant bits are used by VEX and
+  //!   EVEX prefixes, and additional 4th bit is used by XOP prefix, AsmJit uses the 3rd and 5th bit for it's own
+  //!   purposes. These bits will probably never be used in future encodings as AVX512 uses only `000mm` from `mmmmm`.
+  //!
+  //! - `PP` field is used to encode prefixes needed by the instruction or as a part of VEX/EVEX prefix. Described as
+  //!   `pp` in instruction manuals.
+  //!
+  //! - `LL` field is used exclusively by AVX+ and AVX512+ instruction sets. It describes vector size, which is `L.128`
+  //!   for XMM register, `L.256` for for YMM register, and `L.512` for ZMM register. The `LL` field is omitted in case
+  //!   that instruction supports multiple vector lengths, however, if the instruction requires specific `L` value it
+  //!   must be specified as a part of the opcode.
+  //!
+  //!   NOTE: `LL` having value `11` is not defined yet.
+  //!
+  //! - `W` field is the most complicated. It was added by 64-bit architecture to promote default operation width
+  //!   (instructions that perform 32-bit operation by default require to override the width to 64-bit explicitly).
+  //!   There is nothing wrong on this, however, some instructions introduced implicit `W` override, for example a
+  //!   `cdqe` instruction is basically a `cwde` instruction with overridden `W` (set to 1). There are some others
+  //!   in the base X86 instruction set. More recent instruction sets started using `W` field more often:
+  //!
+  //!   - AVX instructions started using `W` field as an extended opcode for FMA, GATHER, PERM, and other instructions.
+  //!     It also uses `W` field to override the default operation width in instructions like `vmovq`.
+  //!
+  //!   - AVX-512 instructions started using `W` field as an extended opcode for all new instructions. This wouldn't
+  //!     have been an issue if the `W` field of AVX-512 have matched AVX, but this is not always the case.
+  //!
+  //! - `O` field is an extended opcode field (3 bits) embedded in ModR/M BYTE.
+  //!
+  //! - `CDSHL` and `CDTT` fields describe 'compressed-displacement'. `CDSHL` is defined for each instruction that is
+  //!   AVX-512 encodable (EVEX) and contains a base N shift (base shift to perform the calculation). The `CDTT` field
+  //!   is derived from instruction specification and describes additional shift to calculate the final `CDSHL` that
+  //!   will be used in SIB byte.
+  //!
+  //! \note Don't reorder any fields here, the shifts and masks were defined carefully to make encoding of X86
+  //! instructions fast, especially to construct REX, VEX, and EVEX prefixes in the most efficient way. Changing
+  //! values defined by these enums many cause AsmJit to emit invalid binary representations of instructions passed to
+  //! `x86::Assembler::_emit`.
+  enum Bits : uint32_t {
+    // MM & VEX & EVEX & XOP
+    // ---------------------
+    //
+    // Two meanings:
+    //  * Part of a legacy opcode (prefixes emitted before the main opcode byte).
+    //  * `MMMMM` field in VEX|EVEX|XOP instruction.
+    //
+    // AVX reserves 5 bits for `MMMMM` field, however AVX instructions only use 2 bits and XOP 3 bits. AVX-512 shrinks
+    // `MMMMM` field into `MMM` so it's safe to use [4:3] bits of `MMMMM` field for internal payload.
+    //
+    // AsmJit divides MMMMM field into this layout:
+    //
+    // [2:0] - Used to describe 0F, 0F38 and 0F3A legacy prefix bytes and 3 bits of MMMMM field for XOP/AVX/AVX512.
+    // [3]   - Required by XOP instructions, so we use this bit also to indicate that this is a XOP opcode.
+    // [4]   - Used to force EVEX prefix - this bit is not used by any X86 instruction yet, so AsmJit uses it to
+    //         describe EVEX only instructions or sets its bit when user uses InstOptions::kX86_Evex to force EVEX.
+    kMM_Shift      = 8,
+    kMM_Mask       = 0x1Fu << kMM_Shift,
+    kMM_00         = 0x00u << kMM_Shift,
+    kMM_0F         = 0x01u << kMM_Shift,
+    kMM_0F38       = 0x02u << kMM_Shift,
+    kMM_0F3A       = 0x03u << kMM_Shift,   // Described also as XOP.M3 in AMD manuals.
+    kMM_0F01       = 0x04u << kMM_Shift,   // AsmJit way to describe 0F01 (never VEX/EVEX).
+
+    kMM_MAP5       = 0x05u << kMM_Shift,   // EVEX.MAP5.
+    kMM_MAP6       = 0x06u << kMM_Shift,   // EVEX.MAP6.
+
+    // `XOP` field is only used to force XOP prefix instead of VEX3 prefix. We know XOP encodings always use 0b1000
+    // bit of MM field and that no VEX and EVEX instruction use such bit yet, so we can use this bit to force XOP
+    // prefix to be emitted instead of VEX3 prefix. See `x86VEXPrefix` defined in `x86assembler.cpp`.
+    kMM_XOP08      = 0x08u << kMM_Shift,   // XOP.M8.
+    kMM_XOP09      = 0x09u << kMM_Shift,   // XOP.M9.
+    kMM_XOP0A      = 0x0Au << kMM_Shift,   // XOP.MA.
+
+    kMM_IsXOP_Shift= kMM_Shift + 3,
+    kMM_IsXOP      = kMM_XOP08,
+
+    // NOTE: Force VEX3 allows to force to emit VEX3 instead of VEX2 in some cases (similar to forcing REX prefix).
+    // Force EVEX will force emitting EVEX prefix instead of VEX2|VEX3. EVEX-only instructions will have ForceEvex
+    // always set, however. instructions that can be encoded by either VEX or EVEX prefix should not have ForceEvex
+    // set.
+    kMM_ForceEvex  = 0x10u << kMM_Shift,   // Force 4-BYTE EVEX prefix.
+
+    // FPU_2B - Second-Byte of the Opcode used by FPU
+    // ----------------------------------------------
+    //
+    // Second byte opcode. This BYTE is ONLY used by FPU instructions and collides with 3 bits from `MM` and 5 bits
+    // from 'CDSHL' and 'CDTT'. It's fine as FPU and AVX512 flags are never used at the same time.
+    kFPU_2B_Shift  = 10,
+    kFPU_2B_Mask   = 0xFF << kFPU_2B_Shift,
+
+    // CDSHL & CDTT
+    // ------------
+    //
+    // Compressed displacement bits.
+    //
+    // Each opcode defines the base size (N) shift:
+    //   [0]: BYTE  (1 byte).
+    //   [1]: WORD  (2 bytes).
+    //   [2]: DWORD (4 bytes - float/int32).
+    //   [3]: QWORD (8 bytes - double/int64).
+    //   [4]: OWORD (16 bytes - used by FV|FVM|M128).
+    //
+    // Which is then scaled by the instruction's TT (TupleType) into possible:
+    //   [5]: YWORD (32 bytes)
+    //   [6]: ZWORD (64 bytes)
+    //
+    // These bits are then adjusted before calling EmitModSib or EmitModVSib.
+    kCDSHL_Shift   = 13,
+    kCDSHL_Mask    = 0x7u << kCDSHL_Shift,
+
+    kCDSHL__       = 0x0u << kCDSHL_Shift, // Base element size not used.
+    kCDSHL_0       = 0x0u << kCDSHL_Shift, // N << 0.
+    kCDSHL_1       = 0x1u << kCDSHL_Shift, // N << 1.
+    kCDSHL_2       = 0x2u << kCDSHL_Shift, // N << 2.
+    kCDSHL_3       = 0x3u << kCDSHL_Shift, // N << 3.
+    kCDSHL_4       = 0x4u << kCDSHL_Shift, // N << 4.
+    kCDSHL_5       = 0x5u << kCDSHL_Shift, // N << 5.
+
+    // Compressed displacement tuple-type (specific to AsmJit).
+    //
+    // Since we store the base offset independently of CDTT we can simplify the number of 'TUPLE_TYPE' groups
+    // significantly and just handle special cases.
+    kCDTT_Shift    = 16,
+    kCDTT_Mask     = 0x3u << kCDTT_Shift,
+    kCDTT_None     = 0x0u << kCDTT_Shift,  // Does nothing.
+    kCDTT_ByLL     = 0x1u << kCDTT_Shift,  // Scales by LL (1x 2x 4x).
+    kCDTT_T1W      = 0x2u << kCDTT_Shift,  // Used to add 'W' to the shift.
+    kCDTT_DUP      = 0x3u << kCDTT_Shift,  // Special 'VMOVDDUP' case.
+
+    // Aliases that match names used in instruction manuals.
+    kCDTT__        = kCDTT_None,
+    kCDTT_FV       = kCDTT_ByLL,
+    kCDTT_HV       = kCDTT_ByLL,
+    kCDTT_QV       = kCDTT_ByLL,
+    kCDTT_FVM      = kCDTT_ByLL,
+    kCDTT_T1S      = kCDTT_None,
+    kCDTT_T1F      = kCDTT_None,
+    kCDTT_T1_4X    = kCDTT_None,
+    kCDTT_T4X      = kCDTT_None,           // Alias to have only 3 letters.
+    kCDTT_T2       = kCDTT_None,
+    kCDTT_T4       = kCDTT_None,
+    kCDTT_T8       = kCDTT_None,
+    kCDTT_HVM      = kCDTT_ByLL,
+    kCDTT_QVM      = kCDTT_ByLL,
+    kCDTT_OVM      = kCDTT_ByLL,
+    kCDTT_128      = kCDTT_None,
+
+    // `O` Field in ModR/M (??:xxx:???)
+    // --------------------------------
+
+    kModO_Shift    = 18,
+    kModO_Mask     = 0x7u << kModO_Shift,
+
+    kModO__        = 0x0u,
+    kModO_0        = 0x0u << kModO_Shift,
+    kModO_1        = 0x1u << kModO_Shift,
+    kModO_2        = 0x2u << kModO_Shift,
+    kModO_3        = 0x3u << kModO_Shift,
+    kModO_4        = 0x4u << kModO_Shift,
+    kModO_5        = 0x5u << kModO_Shift,
+    kModO_6        = 0x6u << kModO_Shift,
+    kModO_7        = 0x7u << kModO_Shift,
+
+    // `RM` Field in ModR/M (??:???:xxx)
+    // ---------------------------------
+    //
+    // Second data field used by ModR/M byte. This is only used by few instructions that use OPCODE+MOD/RM where both
+    // values in Mod/RM are part of the opcode.
+
+    kModRM_Shift    = 13,
+    kModRM_Mask     = 0x7u << kModRM_Shift,
+
+    kModRM__        = 0x0u,
+    kModRM_0        = 0x0u << kModRM_Shift,
+    kModRM_1        = 0x1u << kModRM_Shift,
+    kModRM_2        = 0x2u << kModRM_Shift,
+    kModRM_3        = 0x3u << kModRM_Shift,
+    kModRM_4        = 0x4u << kModRM_Shift,
+    kModRM_5        = 0x5u << kModRM_Shift,
+    kModRM_6        = 0x6u << kModRM_Shift,
+    kModRM_7        = 0x7u << kModRM_Shift,
+
+    // `PP` Field
+    // ----------
+    //
+    // These fields are stored deliberately right after each other as it makes it easier to construct VEX prefix from
+    // the opcode value stored in the instruction database.
+    //
+    // Two meanings:
+    //   * "PP" field in AVX/XOP/AVX-512 instruction.
+    //   * Mandatory Prefix in legacy encoding.
+    //
+    // AVX reserves 2 bits for `PP` field, but AsmJit extends the storage by 1 more bit that is used to emit 9B prefix
+    // for some X87-FPU instructions.
+
+    kPP_Shift      = 21,
+    kPP_VEXMask    = 0x03u << kPP_Shift,   // PP field mask used by VEX/EVEX.
+    kPP_FPUMask    = 0x07u << kPP_Shift,   // Mask used by EMIT_PP, also includes '0x9B'.
+    kPP_00         = 0x00u << kPP_Shift,
+    kPP_66         = 0x01u << kPP_Shift,
+    kPP_F3         = 0x02u << kPP_Shift,
+    kPP_F2         = 0x03u << kPP_Shift,
+
+    kPP_9B         = 0x07u << kPP_Shift,   // AsmJit specific to emit FPU's '9B' byte.
+
+    // REX|VEX|EVEX B|X|R|W Bits
+    // -------------------------
+    //
+    // NOTE: REX.[B|X|R] are never stored within the opcode itself, they are reserved by AsmJit are are added
+    // dynamically to the opcode to represent [REX|VEX|EVEX].[B|X|R] bits. REX.W can be stored in DB as it's sometimes
+    // part of the opcode itself.
+
+    // These must be binary compatible with instruction options.
+    kREX_Shift     = 24,
+    kREX_Mask      = 0x0Fu << kREX_Shift,
+    kB             = 0x01u << kREX_Shift,  // Never stored in DB, used by encoder.
+    kX             = 0x02u << kREX_Shift,  // Never stored in DB, used by encoder.
+    kR             = 0x04u << kREX_Shift,  // Never stored in DB, used by encoder.
+    kW             = 0x08u << kREX_Shift,
+    kW_Shift       = kREX_Shift + 3,
+
+    kW__           = 0u << kW_Shift,       // REX.W/VEX.W is unspecified.
+    kW_x           = 0u << kW_Shift,       // REX.W/VEX.W is based on instruction operands.
+    kW_I           = 0u << kW_Shift,       // REX.W/VEX.W is ignored (WIG).
+    kW_0           = 0u << kW_Shift,       // REX.W/VEX.W is 0 (W0).
+    kW_1           = 1u << kW_Shift,       // REX.W/VEX.W is 1 (W1).
+
+    // EVEX.W Field
+    // ------------
+    //
+    // `W` field used by EVEX instruction encoding.
+
+    kEvex_W_Shift  = 28,
+    kEvex_W_Mask   = 1u << kEvex_W_Shift,
+
+    kEvex_W__      = 0u << kEvex_W_Shift,  // EVEX.W is unspecified (not EVEX instruction).
+    kEvex_W_x      = 0u << kEvex_W_Shift,  // EVEX.W is based on instruction operands.
+    kEvex_W_I      = 0u << kEvex_W_Shift,  // EVEX.W is ignored (WIG).
+    kEvex_W_0      = 0u << kEvex_W_Shift,  // EVEX.W is 0 (W0).
+    kEvex_W_1      = 1u << kEvex_W_Shift,  // EVEX.W is 1 (W1).
+
+    // `L` or `LL` field in AVX/XOP/AVX-512
+    // ------------------------------------
+    //
+    // VEX/XOP prefix can only use the first bit `L.128` or `L.256`. EVEX prefix prefix makes it possible to use also
+    // `L.512`. If the instruction set manual describes an instruction by `LIG` it means that the `L` field is ignored
+    // and AsmJit defaults to `0` in such case.
+    kLL_Shift      = 29,
+    kLL_Mask       = 0x3u << kLL_Shift,
+
+    kLL__          = 0x0u << kLL_Shift,    // LL is unspecified.
+    kLL_x          = 0x0u << kLL_Shift,    // LL is based on instruction operands.
+    kLL_I          = 0x0u << kLL_Shift,    // LL is ignored (LIG).
+    kLL_0          = 0x0u << kLL_Shift,    // LL is 0 (L.128).
+    kLL_1          = 0x1u << kLL_Shift,    // LL is 1 (L.256).
+    kLL_2          = 0x2u << kLL_Shift,    // LL is 2 (L.512).
+
+    // Opcode Combinations
+    // -------------------
+
+    k0      = 0,                           // '__' (no prefix, used internally).
+    k000000 = kPP_00 | kMM_00,             // '__' (no prefix, to be the same width as others).
+    k000F00 = kPP_00 | kMM_0F,             // '0F'
+    k000F01 = kPP_00 | kMM_0F01,           // '0F01'
+    k000F0F = kPP_00 | kMM_0F,             // '0F0F' - 3DNOW, equal to 0x0F, must have special encoding to take effect.
+    k000F38 = kPP_00 | kMM_0F38,           // 'NP.0F38'
+    k000F3A = kPP_00 | kMM_0F3A,           // 'NP.0F3A'
+    k00MAP5 = kPP_00 | kMM_MAP5,           // 'NP.MAP5'
+    k00MAP6 = kPP_00 | kMM_MAP6,           // 'NP.MAP5'
+    k660000 = kPP_66 | kMM_00,             // '66'
+    k660F00 = kPP_66 | kMM_0F,             // '66.0F'
+    k660F01 = kPP_66 | kMM_0F01,           // '66.0F01'
+    k660F38 = kPP_66 | kMM_0F38,           // '66.0F38'
+    k660F3A = kPP_66 | kMM_0F3A,           // '66.0F3A'
+    k66MAP5 = kPP_66 | kMM_MAP5,           // '66.MAP5'
+    k66MAP6 = kPP_66 | kMM_MAP6,           // '66.MAP5'
+    kF20000 = kPP_F2 | kMM_00,             // 'F2'
+    kF20F00 = kPP_F2 | kMM_0F,             // 'F2.0F'
+    kF20F01 = kPP_F2 | kMM_0F01,           // 'F2.0F01'
+    kF20F38 = kPP_F2 | kMM_0F38,           // 'F2.0F38'
+    kF20F3A = kPP_F2 | kMM_0F3A,           // 'F2.0F3A'
+    kF2MAP5 = kPP_F2 | kMM_MAP5,           // 'F2.MAP5'
+    kF2MAP6 = kPP_F2 | kMM_MAP6,           // 'F2.MAP5'
+    kF30000 = kPP_F3 | kMM_00,             // 'F3'
+    kF30F00 = kPP_F3 | kMM_0F,             // 'F3.0F'
+    kF30F01 = kPP_F3 | kMM_0F01,           // 'F3.0F01'
+    kF30F38 = kPP_F3 | kMM_0F38,           // 'F3.0F38'
+    kF30F3A = kPP_F3 | kMM_0F3A,           // 'F3.0F3A'
+    kF3MAP5 = kPP_F3 | kMM_MAP5,           // 'F3.MAP5'
+    kF3MAP6 = kPP_F3 | kMM_MAP6,           // 'F3.MAP5'
+    kFPU_00 = kPP_00 | kMM_00,             // '__' (FPU)
+    kFPU_9B = kPP_9B | kMM_00,             // '9B' (FPU)
+    kXOP_M8 = kPP_00 | kMM_XOP08,          // 'M8' (XOP)
+    kXOP_M9 = kPP_00 | kMM_XOP09,          // 'M9' (XOP)
+    kXOP_MA = kPP_00 | kMM_XOP0A           // 'MA' (XOP)
+  };
+
+  // Opcode Builder
+  // --------------
+
+  inline uint32_t get() const noexcept { return v; }
+
+  inline bool hasW() const noexcept { return (v & kW) != 0; }
+  inline bool has66h() const noexcept { return (v & kPP_66) != 0; }
+
+  inline Opcode& add(uint32_t x) noexcept { return operator+=(x); }
+
+  inline Opcode& add66h() noexcept { return operator|=(kPP_66); }
+  template<typename T>
+  inline Opcode& add66hIf(T exp) noexcept { return operator|=(uint32_t(exp) << kPP_Shift); }
+  template<typename T>
+  inline Opcode& add66hBySize(T size) noexcept { return add66hIf(size == 2); }
+
+  inline Opcode& addW() noexcept { return operator|=(kW); }
+  template<typename T>
+  inline Opcode& addWIf(T exp) noexcept { return operator|=(uint32_t(exp) << kW_Shift); }
+  template<typename T>
+  inline Opcode& addWBySize(T size) noexcept { return addWIf(size == 8); }
+
+  template<typename T>
+  inline Opcode& addPrefixBySize(T size) noexcept {
+    static const uint32_t mask[16] = {
+      0,          // #0
+      0,          // #1 -> nothing (already handled or not possible)
+      kPP_66,     // #2 -> 66H
+      0,          // #3
+      0,          // #4 -> nothing
+      0,          // #5
+      0,          // #6
+      0,          // #7
+      kW          // #8 -> REX.W
+    };
+    return operator|=(mask[size & 0xF]);
+  }
+
+  template<typename T>
+  inline Opcode& addArithBySize(T size) noexcept {
+    static const uint32_t mask[16] = {
+      0,          // #0
+      0,          // #1 -> nothing
+      1 | kPP_66, // #2 -> NOT_BYTE_OP(1) and 66H
+      0,          // #3
+      1,          // #4 -> NOT_BYTE_OP(1)
+      0,          // #5
+      0,          // #6
+      0,          // #7
+      1 | kW      // #8 -> NOT_BYTE_OP(1) and REX.W
+    };
+    return operator|=(mask[size & 0xF]);
+  }
+
+  inline Opcode& forceEvex() noexcept { return operator|=(kMM_ForceEvex); }
+  template<typename T>
+  inline Opcode& forceEvexIf(T exp) noexcept { return operator|=(uint32_t(exp) << Support::ConstCTZ<uint32_t(kMM_ForceEvex)>::value); }
+
+  //! Extract `O` field (R) from the opcode (specified as /0..7 in instruction manuals).
+  inline uint32_t extractModO() const noexcept {
+    return (v >> kModO_Shift) & 0x07;
+  }
+
+  //! Extract `RM` field (RM) from the opcode (usually specified as another opcode value).
+  inline uint32_t extractModRM() const noexcept {
+    return (v >> kModRM_Shift) & 0x07;
+  }
+
+  //! Extract `REX` prefix from opcode combined with `options`.
+  inline uint32_t extractRex(InstOptions options) const noexcept {
+    // kREX was designed in a way that when shifted there will be no bytes set except REX.[B|X|R|W].
+    // The returned value forms a real REX prefix byte. This case should be unit-tested as well.
+    return (v | uint32_t(options)) >> kREX_Shift;
+  }
+
+  inline uint32_t extractLLMMMMM(InstOptions options) const noexcept {
+    uint32_t llMmmmm = uint32_t(v & (kLL_Mask | kMM_Mask));
+    uint32_t vexEvex = uint32_t(options & InstOptions::kX86_Evex);
+    return (llMmmmm | vexEvex) >> kMM_Shift;
+  }
+
+  inline Opcode& operator=(uint32_t x) noexcept { v = x; return *this; }
+  inline Opcode& operator+=(uint32_t x) noexcept { v += x; return *this; }
+  inline Opcode& operator-=(uint32_t x) noexcept { v -= x; return *this; }
+  inline Opcode& operator&=(uint32_t x) noexcept { v &= x; return *this; }
+  inline Opcode& operator|=(uint32_t x) noexcept { v |= x; return *this; }
+  inline Opcode& operator^=(uint32_t x) noexcept { v ^= x; return *this; }
+
+  inline uint32_t operator&(uint32_t x) const noexcept { return v & x; }
+  inline uint32_t operator|(uint32_t x) const noexcept { return v | x; }
+  inline uint32_t operator^(uint32_t x) const noexcept { return v ^ x; }
+  inline uint32_t operator<<(uint32_t x) const noexcept { return v << x; }
+  inline uint32_t operator>>(uint32_t x) const noexcept { return v >> x; }
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86OPCODE_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86operand.cpp b/lib/lepton/asmjit/x86/x86operand.cpp
new file mode 100644
index 0000000000..a47fec2b5b
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86operand.cpp
@@ -0,0 +1,231 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86)
+
+#include "../core/misc_p.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::Operand - Tests
+// ====================
+
+#if defined(ASMJIT_TEST)
+UNIT(x86_operand) {
+  Label L(1000); // Label with some ID.
+
+  INFO("Checking basic properties of built-in X86 registers");
+  EXPECT(gpb(Gp::kIdAx) == al);
+  EXPECT(gpb(Gp::kIdBx) == bl);
+  EXPECT(gpb(Gp::kIdCx) == cl);
+  EXPECT(gpb(Gp::kIdDx) == dl);
+
+  EXPECT(gpb_lo(Gp::kIdAx) == al);
+  EXPECT(gpb_lo(Gp::kIdBx) == bl);
+  EXPECT(gpb_lo(Gp::kIdCx) == cl);
+  EXPECT(gpb_lo(Gp::kIdDx) == dl);
+
+  EXPECT(gpb_hi(Gp::kIdAx) == ah);
+  EXPECT(gpb_hi(Gp::kIdBx) == bh);
+  EXPECT(gpb_hi(Gp::kIdCx) == ch);
+  EXPECT(gpb_hi(Gp::kIdDx) == dh);
+
+  EXPECT(gpw(Gp::kIdAx) == ax);
+  EXPECT(gpw(Gp::kIdBx) == bx);
+  EXPECT(gpw(Gp::kIdCx) == cx);
+  EXPECT(gpw(Gp::kIdDx) == dx);
+
+  EXPECT(gpd(Gp::kIdAx) == eax);
+  EXPECT(gpd(Gp::kIdBx) == ebx);
+  EXPECT(gpd(Gp::kIdCx) == ecx);
+  EXPECT(gpd(Gp::kIdDx) == edx);
+
+  EXPECT(gpq(Gp::kIdAx) == rax);
+  EXPECT(gpq(Gp::kIdBx) == rbx);
+  EXPECT(gpq(Gp::kIdCx) == rcx);
+  EXPECT(gpq(Gp::kIdDx) == rdx);
+
+  EXPECT(gpb(Gp::kIdAx) != dl);
+  EXPECT(gpw(Gp::kIdBx) != cx);
+  EXPECT(gpd(Gp::kIdCx) != ebx);
+  EXPECT(gpq(Gp::kIdDx) != rax);
+
+  INFO("Checking if x86::reg(...) matches built-in IDs");
+  EXPECT(gpb(5) == bpl);
+  EXPECT(gpw(5) == bp);
+  EXPECT(gpd(5) == ebp);
+  EXPECT(gpq(5) == rbp);
+  EXPECT(st(5)  == st5);
+  EXPECT(mm(5)  == mm5);
+  EXPECT(k(5)   == k5);
+  EXPECT(cr(5)  == cr5);
+  EXPECT(dr(5)  == dr5);
+  EXPECT(xmm(5) == xmm5);
+  EXPECT(ymm(5) == ymm5);
+  EXPECT(zmm(5) == zmm5);
+
+  INFO("Checking x86::Gp register properties");
+  EXPECT(Gp().isReg() == true);
+  EXPECT(eax.isReg() == true);
+  EXPECT(eax.id() == 0);
+  EXPECT(eax.size() == 4);
+  EXPECT(eax.type() == RegType::kX86_Gpd);
+  EXPECT(eax.group() == RegGroup::kGp);
+
+  INFO("Checking x86::Xmm register properties");
+  EXPECT(Xmm().isReg() == true);
+  EXPECT(xmm4.isReg() == true);
+  EXPECT(xmm4.id() == 4);
+  EXPECT(xmm4.size() == 16);
+  EXPECT(xmm4.type() == RegType::kX86_Xmm);
+  EXPECT(xmm4.group() == RegGroup::kVec);
+  EXPECT(xmm4.isVec());
+
+  INFO("Checking x86::Ymm register properties");
+  EXPECT(Ymm().isReg() == true);
+  EXPECT(ymm5.isReg() == true);
+  EXPECT(ymm5.id() == 5);
+  EXPECT(ymm5.size() == 32);
+  EXPECT(ymm5.type() == RegType::kX86_Ymm);
+  EXPECT(ymm5.group() == RegGroup::kVec);
+  EXPECT(ymm5.isVec());
+
+  INFO("Checking x86::Zmm register properties");
+  EXPECT(Zmm().isReg() == true);
+  EXPECT(zmm6.isReg() == true);
+  EXPECT(zmm6.id() == 6);
+  EXPECT(zmm6.size() == 64);
+  EXPECT(zmm6.type() == RegType::kX86_Zmm);
+  EXPECT(zmm6.group() == RegGroup::kVec);
+  EXPECT(zmm6.isVec());
+
+  INFO("Checking x86::Vec register properties");
+  EXPECT(Vec().isReg() == true);
+  // Converts a VEC register to a type of the passed register, but keeps the ID.
+  EXPECT(xmm4.cloneAs(ymm10) == ymm4);
+  EXPECT(xmm4.cloneAs(zmm11) == zmm4);
+  EXPECT(ymm5.cloneAs(xmm12) == xmm5);
+  EXPECT(ymm5.cloneAs(zmm13) == zmm5);
+  EXPECT(zmm6.cloneAs(xmm14) == xmm6);
+  EXPECT(zmm6.cloneAs(ymm15) == ymm6);
+
+  EXPECT(xmm7.xmm() == xmm7);
+  EXPECT(xmm7.ymm() == ymm7);
+  EXPECT(xmm7.zmm() == zmm7);
+
+  EXPECT(ymm7.xmm() == xmm7);
+  EXPECT(ymm7.ymm() == ymm7);
+  EXPECT(ymm7.zmm() == zmm7);
+
+  EXPECT(zmm7.xmm() == xmm7);
+  EXPECT(zmm7.ymm() == ymm7);
+  EXPECT(zmm7.zmm() == zmm7);
+
+  INFO("Checking x86::Mm register properties");
+  EXPECT(Mm().isReg() == true);
+  EXPECT(mm2.isReg() == true);
+  EXPECT(mm2.id() == 2);
+  EXPECT(mm2.size() == 8);
+  EXPECT(mm2.type() == RegType::kX86_Mm);
+  EXPECT(mm2.group() == RegGroup::kX86_MM);
+
+  INFO("Checking x86::KReg register properties");
+  EXPECT(KReg().isReg() == true);
+  EXPECT(k3.isReg() == true);
+  EXPECT(k3.id() == 3);
+  EXPECT(k3.size() == 0);
+  EXPECT(k3.type() == RegType::kX86_KReg);
+  EXPECT(k3.group() == RegGroup::kX86_K);
+
+  INFO("Checking x86::St register properties");
+  EXPECT(St().isReg() == true);
+  EXPECT(st1.isReg() == true);
+  EXPECT(st1.id() == 1);
+  EXPECT(st1.size() == 10);
+  EXPECT(st1.type() == RegType::kX86_St);
+  EXPECT(st1.group() == RegGroup::kX86_St);
+
+  INFO("Checking if default constructed regs behave as expected");
+  EXPECT(Reg().isValid() == false);
+  EXPECT(Gp().isValid() == false);
+  EXPECT(Xmm().isValid() == false);
+  EXPECT(Ymm().isValid() == false);
+  EXPECT(Zmm().isValid() == false);
+  EXPECT(Mm().isValid() == false);
+  EXPECT(KReg().isValid() == false);
+  EXPECT(SReg().isValid() == false);
+  EXPECT(CReg().isValid() == false);
+  EXPECT(DReg().isValid() == false);
+  EXPECT(St().isValid() == false);
+  EXPECT(Bnd().isValid() == false);
+
+  INFO("Checking x86::Mem operand");
+  Mem m;
+  EXPECT(m == Mem(), "Two default constructed x86::Mem operands must be equal");
+
+  m = ptr(L);
+  EXPECT(m.hasBase() == true);
+  EXPECT(m.hasBaseReg() == false);
+  EXPECT(m.hasBaseLabel() == true);
+  EXPECT(m.hasOffset() == false);
+  EXPECT(m.isOffset64Bit() == false);
+  EXPECT(m.offset() == 0);
+  EXPECT(m.offsetLo32() == 0);
+
+  m = ptr(0x0123456789ABCDEFu);
+  EXPECT(m.hasBase() == false);
+  EXPECT(m.hasBaseReg() == false);
+  EXPECT(m.hasIndex() == false);
+  EXPECT(m.hasIndexReg() == false);
+  EXPECT(m.hasOffset() == true);
+  EXPECT(m.isOffset64Bit() == true);
+  EXPECT(m.offset() == int64_t(0x0123456789ABCDEFu));
+  EXPECT(m.offsetLo32() == int32_t(0x89ABCDEFu));
+  m.addOffset(1);
+  EXPECT(m.offset() == int64_t(0x0123456789ABCDF0u));
+
+  m = ptr(0x0123456789ABCDEFu, rdi, 3);
+  EXPECT(m.hasSegment() == false);
+  EXPECT(m.hasBase() == false);
+  EXPECT(m.hasBaseReg() == false);
+  EXPECT(m.hasIndex() == true);
+  EXPECT(m.hasIndexReg() == true);
+  EXPECT(m.indexType() == rdi.type());
+  EXPECT(m.indexId() == rdi.id());
+  EXPECT(m.shift() == 3);
+  EXPECT(m.hasOffset() == true);
+  EXPECT(m.isOffset64Bit() == true);
+  EXPECT(m.offset() == int64_t(0x0123456789ABCDEFu));
+  EXPECT(m.offsetLo32() == int32_t(0x89ABCDEFu));
+  m.resetIndex();
+  EXPECT(m.hasIndex() == false);
+  EXPECT(m.hasIndexReg() == false);
+
+  m = ptr(rax);
+  EXPECT(m.hasBase() == true);
+  EXPECT(m.hasBaseReg() == true);
+  EXPECT(m.baseType() == rax.type());
+  EXPECT(m.baseId() == rax.id());
+  EXPECT(m.hasIndex() == false);
+  EXPECT(m.hasIndexReg() == false);
+  EXPECT(m.indexType() == RegType::kNone);
+  EXPECT(m.indexId() == 0);
+  EXPECT(m.hasOffset() == false);
+  EXPECT(m.isOffset64Bit() == false);
+  EXPECT(m.offset() == 0);
+  EXPECT(m.offsetLo32() == 0);
+  m.setIndex(rsi);
+  EXPECT(m.hasIndex() == true);
+  EXPECT(m.hasIndexReg() == true);
+  EXPECT(m.indexType() == rsi.type());
+  EXPECT(m.indexId() == rsi.id());
+}
+#endif
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86
diff --git a/lib/lepton/asmjit/x86/x86operand.h b/lib/lepton/asmjit/x86/x86operand.h
new file mode 100644
index 0000000000..037d4af4dd
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86operand.h
@@ -0,0 +1,1085 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86OPERAND_H_INCLUDED
+#define ASMJIT_X86_X86OPERAND_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/operand.h"
+#include "../core/type.h"
+#include "../x86/x86globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \addtogroup asmjit_x86
+//! \{
+
+class Reg;
+class Mem;
+
+class Gp;
+class Gpb;
+class GpbLo;
+class GpbHi;
+class Gpw;
+class Gpd;
+class Gpq;
+class Vec;
+class Xmm;
+class Ymm;
+class Zmm;
+class Mm;
+class KReg;
+class SReg;
+class CReg;
+class DReg;
+class St;
+class Bnd;
+class Tmm;
+class Rip;
+
+//! Register traits (X86).
+//!
+//! Register traits contains information about a particular register type. It's used by asmjit to setup register
+//! information on-the-fly and to populate tables that contain register information (this way it's possible to change
+//! register types and groups without having to reorder these tables).
+template<RegType kRegType>
+struct RegTraits : public BaseRegTraits {};
+
+//! \cond
+// <--------------------+-----+-------------------------+------------------------+---+---+------------------+
+//                      | Reg |        Reg-Type         |        Reg-Group       |Sz |Cnt|      TypeId      |
+// <--------------------+-----+-------------------------+------------------------+---+---+------------------+
+ASMJIT_DEFINE_REG_TRAITS(Rip  , RegType::kX86_Rip       , RegGroup::kX86_Rip     , 0 , 1 , TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(GpbLo, RegType::kX86_GpbLo     , RegGroup::kGp          , 1 , 16, TypeId::kInt8    );
+ASMJIT_DEFINE_REG_TRAITS(GpbHi, RegType::kX86_GpbHi     , RegGroup::kGp          , 1 , 4 , TypeId::kInt8    );
+ASMJIT_DEFINE_REG_TRAITS(Gpw  , RegType::kX86_Gpw       , RegGroup::kGp          , 2 , 16, TypeId::kInt16   );
+ASMJIT_DEFINE_REG_TRAITS(Gpd  , RegType::kX86_Gpd       , RegGroup::kGp          , 4 , 16, TypeId::kInt32   );
+ASMJIT_DEFINE_REG_TRAITS(Gpq  , RegType::kX86_Gpq       , RegGroup::kGp          , 8 , 16, TypeId::kInt64   );
+ASMJIT_DEFINE_REG_TRAITS(Xmm  , RegType::kX86_Xmm       , RegGroup::kVec         , 16, 32, TypeId::kInt32x4 );
+ASMJIT_DEFINE_REG_TRAITS(Ymm  , RegType::kX86_Ymm       , RegGroup::kVec         , 32, 32, TypeId::kInt32x8 );
+ASMJIT_DEFINE_REG_TRAITS(Zmm  , RegType::kX86_Zmm       , RegGroup::kVec         , 64, 32, TypeId::kInt32x16);
+ASMJIT_DEFINE_REG_TRAITS(KReg , RegType::kX86_KReg      , RegGroup::kX86_K       , 0 , 8 , TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(Mm   , RegType::kX86_Mm        , RegGroup::kX86_MM      , 8 , 8 , TypeId::kMmx64   );
+ASMJIT_DEFINE_REG_TRAITS(SReg , RegType::kX86_SReg      , RegGroup::kX86_SReg    , 2 , 7 , TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(CReg , RegType::kX86_CReg      , RegGroup::kX86_CReg    , 0 , 16, TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(DReg , RegType::kX86_DReg      , RegGroup::kX86_DReg    , 0 , 16, TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(St   , RegType::kX86_St        , RegGroup::kX86_St      , 10, 8 , TypeId::kFloat80 );
+ASMJIT_DEFINE_REG_TRAITS(Bnd  , RegType::kX86_Bnd       , RegGroup::kX86_Bnd     , 16, 4 , TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(Tmm  , RegType::kX86_Tmm       , RegGroup::kX86_Tmm     , 0 , 8 , TypeId::kVoid    );
+//! \endcond
+
+//! Register (X86).
+class Reg : public BaseReg {
+public:
+  ASMJIT_DEFINE_ABSTRACT_REG(Reg, BaseReg)
+
+  //! Tests whether the register is a GPB register (8-bit).
+  inline constexpr bool isGpb() const noexcept { return size() == 1; }
+  //! Tests whether the register is a low GPB register (8-bit).
+  inline constexpr bool isGpbLo() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_GpbLo>::kSignature); }
+  //! Tests whether the register is a high GPB register (8-bit).
+  inline constexpr bool isGpbHi() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_GpbHi>::kSignature); }
+  //! Tests whether the register is a GPW register (16-bit).
+  inline constexpr bool isGpw() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Gpw>::kSignature); }
+  //! Tests whether the register is a GPD register (32-bit).
+  inline constexpr bool isGpd() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Gpd>::kSignature); }
+  //! Tests whether the register is a GPQ register (64-bit).
+  inline constexpr bool isGpq() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Gpq>::kSignature); }
+  //! Tests whether the register is an XMM register (128-bit).
+  inline constexpr bool isXmm() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Xmm>::kSignature); }
+  //! Tests whether the register is a YMM register (256-bit).
+  inline constexpr bool isYmm() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Ymm>::kSignature); }
+  //! Tests whether the register is a ZMM register (512-bit).
+  inline constexpr bool isZmm() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Zmm>::kSignature); }
+  //! Tests whether the register is an MMX register (64-bit).
+  inline constexpr bool isMm() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Mm>::kSignature); }
+  //! Tests whether the register is a K register (64-bit).
+  inline constexpr bool isKReg() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_KReg>::kSignature); }
+  //! Tests whether the register is a segment register.
+  inline constexpr bool isSReg() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_SReg>::kSignature); }
+  //! Tests whether the register is a control register.
+  inline constexpr bool isCReg() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_CReg>::kSignature); }
+  //! Tests whether the register is a debug register.
+  inline constexpr bool isDReg() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_DReg>::kSignature); }
+  //! Tests whether the register is an FPU register (80-bit).
+  inline constexpr bool isSt() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_St>::kSignature); }
+  //! Tests whether the register is a bound register.
+  inline constexpr bool isBnd() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Bnd>::kSignature); }
+  //! Tests whether the register is a TMM register.
+  inline constexpr bool isTmm() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Tmm>::kSignature); }
+  //! Tests whether the register is RIP.
+  inline constexpr bool isRip() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Rip>::kSignature); }
+
+  template<RegType REG_TYPE>
+  inline void setRegT(uint32_t rId) noexcept {
+    setSignature(OperandSignature{RegTraits<REG_TYPE>::kSignature});
+    setId(rId);
+  }
+
+  inline void setTypeAndId(RegType type, uint32_t id) noexcept {
+    setSignature(signatureOf(type));
+    setId(id);
+  }
+
+  static inline RegGroup groupOf(RegType type) noexcept { return ArchTraits::byArch(Arch::kX86).regTypeToGroup(type); }
+  static inline TypeId typeIdOf(RegType type) noexcept { return ArchTraits::byArch(Arch::kX86).regTypeToTypeId(type); }
+  static inline OperandSignature signatureOf(RegType type) noexcept { return ArchTraits::byArch(Arch::kX86).regTypeToSignature(type); }
+
+  template<RegType REG_TYPE>
+  static inline RegGroup groupOfT() noexcept { return RegGroup(RegTraits<REG_TYPE>::kGroup); }
+
+  template<RegType REG_TYPE>
+  static inline TypeId typeIdOfT() noexcept { return TypeId(RegTraits<REG_TYPE>::kTypeId); }
+
+  template<RegType REG_TYPE>
+  static inline OperandSignature signatureOfT() noexcept { return OperandSignature{RegTraits<REG_TYPE>::kSignature}; }
+
+  static inline OperandSignature signatureOfVecByType(TypeId typeId) noexcept {
+    return OperandSignature{typeId <= TypeId::_kVec128End ? uint32_t(RegTraits<RegType::kX86_Xmm>::kSignature) :
+                            typeId <= TypeId::_kVec256End ? uint32_t(RegTraits<RegType::kX86_Ymm>::kSignature) :
+                                                            uint32_t(RegTraits<RegType::kX86_Zmm>::kSignature)};
+  }
+
+  static inline OperandSignature signatureOfVecBySize(uint32_t size) noexcept {
+    return OperandSignature{size <= 16 ? uint32_t(RegTraits<RegType::kX86_Xmm>::kSignature) :
+                            size <= 32 ? uint32_t(RegTraits<RegType::kX86_Ymm>::kSignature) :
+                                         uint32_t(RegTraits<RegType::kX86_Zmm>::kSignature)};
+  }
+
+  //! Tests whether the `op` operand is either a low or high 8-bit GPB register.
+  static inline bool isGpb(const Operand_& op) noexcept {
+    // Check operand type, register group, and size. Not interested in register type.
+    return op.signature().subset(Signature::kOpTypeMask | Signature::kRegGroupMask | Signature::kSizeMask) ==
+           (Signature::fromOpType(OperandType::kReg) | Signature::fromRegGroup(RegGroup::kGp) | Signature::fromSize(1));
+  }
+
+  static inline bool isGpbLo(const Operand_& op) noexcept { return op.as<Reg>().isGpbLo(); }
+  static inline bool isGpbHi(const Operand_& op) noexcept { return op.as<Reg>().isGpbHi(); }
+  static inline bool isGpw(const Operand_& op) noexcept { return op.as<Reg>().isGpw(); }
+  static inline bool isGpd(const Operand_& op) noexcept { return op.as<Reg>().isGpd(); }
+  static inline bool isGpq(const Operand_& op) noexcept { return op.as<Reg>().isGpq(); }
+  static inline bool isXmm(const Operand_& op) noexcept { return op.as<Reg>().isXmm(); }
+  static inline bool isYmm(const Operand_& op) noexcept { return op.as<Reg>().isYmm(); }
+  static inline bool isZmm(const Operand_& op) noexcept { return op.as<Reg>().isZmm(); }
+  static inline bool isMm(const Operand_& op) noexcept { return op.as<Reg>().isMm(); }
+  static inline bool isKReg(const Operand_& op) noexcept { return op.as<Reg>().isKReg(); }
+  static inline bool isSReg(const Operand_& op) noexcept { return op.as<Reg>().isSReg(); }
+  static inline bool isCReg(const Operand_& op) noexcept { return op.as<Reg>().isCReg(); }
+  static inline bool isDReg(const Operand_& op) noexcept { return op.as<Reg>().isDReg(); }
+  static inline bool isSt(const Operand_& op) noexcept { return op.as<Reg>().isSt(); }
+  static inline bool isBnd(const Operand_& op) noexcept { return op.as<Reg>().isBnd(); }
+  static inline bool isTmm(const Operand_& op) noexcept { return op.as<Reg>().isTmm(); }
+  static inline bool isRip(const Operand_& op) noexcept { return op.as<Reg>().isRip(); }
+
+  static inline bool isGpb(const Operand_& op, uint32_t rId) noexcept { return isGpb(op) & (op.id() == rId); }
+  static inline bool isGpbLo(const Operand_& op, uint32_t rId) noexcept { return isGpbLo(op) & (op.id() == rId); }
+  static inline bool isGpbHi(const Operand_& op, uint32_t rId) noexcept { return isGpbHi(op) & (op.id() == rId); }
+  static inline bool isGpw(const Operand_& op, uint32_t rId) noexcept { return isGpw(op) & (op.id() == rId); }
+  static inline bool isGpd(const Operand_& op, uint32_t rId) noexcept { return isGpd(op) & (op.id() == rId); }
+  static inline bool isGpq(const Operand_& op, uint32_t rId) noexcept { return isGpq(op) & (op.id() == rId); }
+  static inline bool isXmm(const Operand_& op, uint32_t rId) noexcept { return isXmm(op) & (op.id() == rId); }
+  static inline bool isYmm(const Operand_& op, uint32_t rId) noexcept { return isYmm(op) & (op.id() == rId); }
+  static inline bool isZmm(const Operand_& op, uint32_t rId) noexcept { return isZmm(op) & (op.id() == rId); }
+  static inline bool isMm(const Operand_& op, uint32_t rId) noexcept { return isMm(op) & (op.id() == rId); }
+  static inline bool isKReg(const Operand_& op, uint32_t rId) noexcept { return isKReg(op) & (op.id() == rId); }
+  static inline bool isSReg(const Operand_& op, uint32_t rId) noexcept { return isSReg(op) & (op.id() == rId); }
+  static inline bool isCReg(const Operand_& op, uint32_t rId) noexcept { return isCReg(op) & (op.id() == rId); }
+  static inline bool isDReg(const Operand_& op, uint32_t rId) noexcept { return isDReg(op) & (op.id() == rId); }
+  static inline bool isSt(const Operand_& op, uint32_t rId) noexcept { return isSt(op) & (op.id() == rId); }
+  static inline bool isBnd(const Operand_& op, uint32_t rId) noexcept { return isBnd(op) & (op.id() == rId); }
+  static inline bool isTmm(const Operand_& op, uint32_t rId) noexcept { return isTmm(op) & (op.id() == rId); }
+  static inline bool isRip(const Operand_& op, uint32_t rId) noexcept { return isRip(op) & (op.id() == rId); }
+};
+
+//! General purpose register (X86).
+class Gp : public Reg {
+public:
+  ASMJIT_DEFINE_ABSTRACT_REG(Gp, Reg)
+
+  //! Physical id (X86).
+  //!
+  //! \note Register indexes have been reduced to only support general purpose registers. There is no need to
+  //! have enumerations with number suffix that expands to the exactly same value as the suffix value itself.
+  enum Id : uint32_t {
+    kIdAx  = 0,  //!< Physical id of AL|AH|AX|EAX|RAX registers.
+    kIdCx  = 1,  //!< Physical id of CL|CH|CX|ECX|RCX registers.
+    kIdDx  = 2,  //!< Physical id of DL|DH|DX|EDX|RDX registers.
+    kIdBx  = 3,  //!< Physical id of BL|BH|BX|EBX|RBX registers.
+    kIdSp  = 4,  //!< Physical id of SPL|SP|ESP|RSP registers.
+    kIdBp  = 5,  //!< Physical id of BPL|BP|EBP|RBP registers.
+    kIdSi  = 6,  //!< Physical id of SIL|SI|ESI|RSI registers.
+    kIdDi  = 7,  //!< Physical id of DIL|DI|EDI|RDI registers.
+    kIdR8  = 8,  //!< Physical id of R8B|R8W|R8D|R8 registers (64-bit only).
+    kIdR9  = 9,  //!< Physical id of R9B|R9W|R9D|R9 registers (64-bit only).
+    kIdR10 = 10, //!< Physical id of R10B|R10W|R10D|R10 registers (64-bit only).
+    kIdR11 = 11, //!< Physical id of R11B|R11W|R11D|R11 registers (64-bit only).
+    kIdR12 = 12, //!< Physical id of R12B|R12W|R12D|R12 registers (64-bit only).
+    kIdR13 = 13, //!< Physical id of R13B|R13W|R13D|R13 registers (64-bit only).
+    kIdR14 = 14, //!< Physical id of R14B|R14W|R14D|R14 registers (64-bit only).
+    kIdR15 = 15  //!< Physical id of R15B|R15W|R15D|R15 registers (64-bit only).
+  };
+
+  //! Casts this register to 8-bit (LO) part.
+  inline GpbLo r8() const noexcept;
+  //! Casts this register to 8-bit (LO) part.
+  inline GpbLo r8Lo() const noexcept;
+  //! Casts this register to 8-bit (HI) part.
+  inline GpbHi r8Hi() const noexcept;
+  //! Casts this register to 16-bit.
+  inline Gpw r16() const noexcept;
+  //! Casts this register to 32-bit.
+  inline Gpd r32() const noexcept;
+  //! Casts this register to 64-bit.
+  inline Gpq r64() const noexcept;
+};
+
+//! Vector register (XMM|YMM|ZMM) (X86).
+class Vec : public Reg {
+  ASMJIT_DEFINE_ABSTRACT_REG(Vec, Reg)
+
+  //! Casts this register to XMM (clone).
+  inline Xmm xmm() const noexcept;
+  //! Casts this register to YMM.
+  inline Ymm ymm() const noexcept;
+  //! Casts this register to ZMM.
+  inline Zmm zmm() const noexcept;
+
+  //! Casts this register to a register that has half the size (or XMM if it's already XMM).
+  inline Vec half() const noexcept {
+    return Vec(type() == RegType::kX86_Zmm ? signatureOfT<RegType::kX86_Ymm>() : signatureOfT<RegType::kX86_Xmm>(), id());
+  }
+};
+
+//! Segment register (X86).
+class SReg : public Reg {
+  ASMJIT_DEFINE_FINAL_REG(SReg, Reg, RegTraits<RegType::kX86_SReg>)
+
+  //! X86 segment id.
+  enum Id : uint32_t {
+    //! No segment (default).
+    kIdNone = 0,
+    //! ES segment.
+    kIdEs = 1,
+    //! CS segment.
+    kIdCs = 2,
+    //! SS segment.
+    kIdSs = 3,
+    //! DS segment.
+    kIdDs = 4,
+    //! FS segment.
+    kIdFs = 5,
+    //! GS segment.
+    kIdGs = 6,
+
+    //! Count of X86 segment registers supported by AsmJit.
+    //!
+    //! \note X86 architecture has 6 segment registers - ES, CS, SS, DS, FS, GS. X64 architecture lowers them down to
+    //! just FS and GS. AsmJit supports 7 segment registers - all addressable in both X86 and X64 modes and one extra
+    //! called `SReg::kIdNone`, which is AsmJit specific and means that there is no segment register specified.
+    kIdCount = 7
+  };
+};
+
+//! GPB low or high register (X86).
+class Gpb : public Gp { ASMJIT_DEFINE_ABSTRACT_REG(Gpb, Gp) };
+//! GPB low register (X86).
+class GpbLo : public Gpb { ASMJIT_DEFINE_FINAL_REG(GpbLo, Gpb, RegTraits<RegType::kX86_GpbLo>) };
+//! GPB high register (X86).
+class GpbHi : public Gpb { ASMJIT_DEFINE_FINAL_REG(GpbHi, Gpb, RegTraits<RegType::kX86_GpbHi>) };
+//! GPW register (X86).
+class Gpw : public Gp { ASMJIT_DEFINE_FINAL_REG(Gpw, Gp, RegTraits<RegType::kX86_Gpw>) };
+//! GPD register (X86).
+class Gpd : public Gp { ASMJIT_DEFINE_FINAL_REG(Gpd, Gp, RegTraits<RegType::kX86_Gpd>) };
+//! GPQ register (X86_64).
+class Gpq : public Gp { ASMJIT_DEFINE_FINAL_REG(Gpq, Gp, RegTraits<RegType::kX86_Gpq>) };
+
+//! 128-bit XMM register (SSE+).
+class Xmm : public Vec {
+  ASMJIT_DEFINE_FINAL_REG(Xmm, Vec, RegTraits<RegType::kX86_Xmm>)
+  //! Casts this register to a register that has half the size (XMM).
+  inline Xmm half() const noexcept { return Xmm(id()); }
+};
+
+//! 256-bit YMM register (AVX+).
+class Ymm : public Vec {
+  ASMJIT_DEFINE_FINAL_REG(Ymm, Vec, RegTraits<RegType::kX86_Ymm>)
+  //! Casts this register to a register that has half the size (XMM).
+  inline Xmm half() const noexcept { return Xmm(id()); }
+};
+
+//! 512-bit ZMM register (AVX512+).
+class Zmm : public Vec {
+  ASMJIT_DEFINE_FINAL_REG(Zmm, Vec, RegTraits<RegType::kX86_Zmm>)
+  //! Casts this register to a register that has half the size (YMM).
+  inline Ymm half() const noexcept { return Ymm(id()); }
+};
+
+//! 64-bit MMX register (MMX+).
+class Mm : public Reg { ASMJIT_DEFINE_FINAL_REG(Mm, Reg, RegTraits<RegType::kX86_Mm>) };
+//! 64-bit K register (AVX512+).
+class KReg : public Reg { ASMJIT_DEFINE_FINAL_REG(KReg, Reg, RegTraits<RegType::kX86_KReg>) };
+//! 32-bit or 64-bit control register (X86).
+class CReg : public Reg { ASMJIT_DEFINE_FINAL_REG(CReg, Reg, RegTraits<RegType::kX86_CReg>) };
+//! 32-bit or 64-bit debug register (X86).
+class DReg : public Reg { ASMJIT_DEFINE_FINAL_REG(DReg, Reg, RegTraits<RegType::kX86_DReg>) };
+//! 80-bit FPU register (X86).
+class St : public Reg { ASMJIT_DEFINE_FINAL_REG(St, Reg, RegTraits<RegType::kX86_St>) };
+//! 128-bit BND register (BND+).
+class Bnd : public Reg { ASMJIT_DEFINE_FINAL_REG(Bnd, Reg, RegTraits<RegType::kX86_Bnd>) };
+//! 8192-bit TMM register (AMX).
+class Tmm : public Reg { ASMJIT_DEFINE_FINAL_REG(Tmm, Reg, RegTraits<RegType::kX86_Tmm>) };
+//! RIP register (X86).
+class Rip : public Reg { ASMJIT_DEFINE_FINAL_REG(Rip, Reg, RegTraits<RegType::kX86_Rip>) };
+
+//! \cond
+inline GpbLo Gp::r8() const noexcept { return GpbLo(id()); }
+inline GpbLo Gp::r8Lo() const noexcept { return GpbLo(id()); }
+inline GpbHi Gp::r8Hi() const noexcept { return GpbHi(id()); }
+inline Gpw Gp::r16() const noexcept { return Gpw(id()); }
+inline Gpd Gp::r32() const noexcept { return Gpd(id()); }
+inline Gpq Gp::r64() const noexcept { return Gpq(id()); }
+inline Xmm Vec::xmm() const noexcept { return Xmm(id()); }
+inline Ymm Vec::ymm() const noexcept { return Ymm(id()); }
+inline Zmm Vec::zmm() const noexcept { return Zmm(id()); }
+//! \endcond
+
+//! \namespace asmjit::x86::regs
+//!
+//! Registers provided by X86 and X64 ISAs are in both `asmjit::x86` and
+//! `asmjit::x86::regs` namespaces so they can be included with using directive.
+//! For example `using namespace asmjit::x86::regs` would include all registers,
+//! but not other X86-specific API, whereas `using namespace asmjit::x86` would
+//! include everything X86-specific.
+#ifndef _DOXYGEN
+namespace regs {
+#endif
+
+//! Creates an 8-bit low GPB register operand.
+static constexpr GpbLo gpb(uint32_t rId) noexcept { return GpbLo(rId); }
+//! Creates an 8-bit low GPB register operand.
+static constexpr GpbLo gpb_lo(uint32_t rId) noexcept { return GpbLo(rId); }
+//! Creates an 8-bit high GPB register operand.
+static constexpr GpbHi gpb_hi(uint32_t rId) noexcept { return GpbHi(rId); }
+//! Creates a 16-bit GPW register operand.
+static constexpr Gpw gpw(uint32_t rId) noexcept { return Gpw(rId); }
+//! Creates a 32-bit GPD register operand.
+static constexpr Gpd gpd(uint32_t rId) noexcept { return Gpd(rId); }
+//! Creates a 64-bit GPQ register operand (64-bit).
+static constexpr Gpq gpq(uint32_t rId) noexcept { return Gpq(rId); }
+//! Creates a 128-bit XMM register operand.
+static constexpr Xmm xmm(uint32_t rId) noexcept { return Xmm(rId); }
+//! Creates a 256-bit YMM register operand.
+static constexpr Ymm ymm(uint32_t rId) noexcept { return Ymm(rId); }
+//! Creates a 512-bit ZMM register operand.
+static constexpr Zmm zmm(uint32_t rId) noexcept { return Zmm(rId); }
+//! Creates a 64-bit Mm register operand.
+static constexpr Mm mm(uint32_t rId) noexcept { return Mm(rId); }
+//! Creates a 64-bit K register operand.
+static constexpr KReg k(uint32_t rId) noexcept { return KReg(rId); }
+//! Creates a 32-bit or 64-bit control register operand.
+static constexpr CReg cr(uint32_t rId) noexcept { return CReg(rId); }
+//! Creates a 32-bit or 64-bit debug register operand.
+static constexpr DReg dr(uint32_t rId) noexcept { return DReg(rId); }
+//! Creates an 80-bit st register operand.
+static constexpr St st(uint32_t rId) noexcept { return St(rId); }
+//! Creates a 128-bit bound register operand.
+static constexpr Bnd bnd(uint32_t rId) noexcept { return Bnd(rId); }
+//! Creates a TMM register operand.
+static constexpr Tmm tmm(uint32_t rId) noexcept { return Tmm(rId); }
+
+static constexpr GpbLo al = GpbLo(Gp::kIdAx);
+static constexpr GpbLo bl = GpbLo(Gp::kIdBx);
+static constexpr GpbLo cl = GpbLo(Gp::kIdCx);
+static constexpr GpbLo dl = GpbLo(Gp::kIdDx);
+static constexpr GpbLo spl = GpbLo(Gp::kIdSp);
+static constexpr GpbLo bpl = GpbLo(Gp::kIdBp);
+static constexpr GpbLo sil = GpbLo(Gp::kIdSi);
+static constexpr GpbLo dil = GpbLo(Gp::kIdDi);
+static constexpr GpbLo r8b = GpbLo(Gp::kIdR8);
+static constexpr GpbLo r9b = GpbLo(Gp::kIdR9);
+static constexpr GpbLo r10b = GpbLo(Gp::kIdR10);
+static constexpr GpbLo r11b = GpbLo(Gp::kIdR11);
+static constexpr GpbLo r12b = GpbLo(Gp::kIdR12);
+static constexpr GpbLo r13b = GpbLo(Gp::kIdR13);
+static constexpr GpbLo r14b = GpbLo(Gp::kIdR14);
+static constexpr GpbLo r15b = GpbLo(Gp::kIdR15);
+
+static constexpr GpbHi ah = GpbHi(Gp::kIdAx);
+static constexpr GpbHi bh = GpbHi(Gp::kIdBx);
+static constexpr GpbHi ch = GpbHi(Gp::kIdCx);
+static constexpr GpbHi dh = GpbHi(Gp::kIdDx);
+
+static constexpr Gpw ax = Gpw(Gp::kIdAx);
+static constexpr Gpw bx = Gpw(Gp::kIdBx);
+static constexpr Gpw cx = Gpw(Gp::kIdCx);
+static constexpr Gpw dx = Gpw(Gp::kIdDx);
+static constexpr Gpw sp = Gpw(Gp::kIdSp);
+static constexpr Gpw bp = Gpw(Gp::kIdBp);
+static constexpr Gpw si = Gpw(Gp::kIdSi);
+static constexpr Gpw di = Gpw(Gp::kIdDi);
+static constexpr Gpw r8w = Gpw(Gp::kIdR8);
+static constexpr Gpw r9w = Gpw(Gp::kIdR9);
+static constexpr Gpw r10w = Gpw(Gp::kIdR10);
+static constexpr Gpw r11w = Gpw(Gp::kIdR11);
+static constexpr Gpw r12w = Gpw(Gp::kIdR12);
+static constexpr Gpw r13w = Gpw(Gp::kIdR13);
+static constexpr Gpw r14w = Gpw(Gp::kIdR14);
+static constexpr Gpw r15w = Gpw(Gp::kIdR15);
+
+static constexpr Gpd eax = Gpd(Gp::kIdAx);
+static constexpr Gpd ebx = Gpd(Gp::kIdBx);
+static constexpr Gpd ecx = Gpd(Gp::kIdCx);
+static constexpr Gpd edx = Gpd(Gp::kIdDx);
+static constexpr Gpd esp = Gpd(Gp::kIdSp);
+static constexpr Gpd ebp = Gpd(Gp::kIdBp);
+static constexpr Gpd esi = Gpd(Gp::kIdSi);
+static constexpr Gpd edi = Gpd(Gp::kIdDi);
+static constexpr Gpd r8d = Gpd(Gp::kIdR8);
+static constexpr Gpd r9d = Gpd(Gp::kIdR9);
+static constexpr Gpd r10d = Gpd(Gp::kIdR10);
+static constexpr Gpd r11d = Gpd(Gp::kIdR11);
+static constexpr Gpd r12d = Gpd(Gp::kIdR12);
+static constexpr Gpd r13d = Gpd(Gp::kIdR13);
+static constexpr Gpd r14d = Gpd(Gp::kIdR14);
+static constexpr Gpd r15d = Gpd(Gp::kIdR15);
+
+static constexpr Gpq rax = Gpq(Gp::kIdAx);
+static constexpr Gpq rbx = Gpq(Gp::kIdBx);
+static constexpr Gpq rcx = Gpq(Gp::kIdCx);
+static constexpr Gpq rdx = Gpq(Gp::kIdDx);
+static constexpr Gpq rsp = Gpq(Gp::kIdSp);
+static constexpr Gpq rbp = Gpq(Gp::kIdBp);
+static constexpr Gpq rsi = Gpq(Gp::kIdSi);
+static constexpr Gpq rdi = Gpq(Gp::kIdDi);
+static constexpr Gpq r8 = Gpq(Gp::kIdR8);
+static constexpr Gpq r9 = Gpq(Gp::kIdR9);
+static constexpr Gpq r10 = Gpq(Gp::kIdR10);
+static constexpr Gpq r11 = Gpq(Gp::kIdR11);
+static constexpr Gpq r12 = Gpq(Gp::kIdR12);
+static constexpr Gpq r13 = Gpq(Gp::kIdR13);
+static constexpr Gpq r14 = Gpq(Gp::kIdR14);
+static constexpr Gpq r15 = Gpq(Gp::kIdR15);
+
+static constexpr Xmm xmm0 = Xmm(0);
+static constexpr Xmm xmm1 = Xmm(1);
+static constexpr Xmm xmm2 = Xmm(2);
+static constexpr Xmm xmm3 = Xmm(3);
+static constexpr Xmm xmm4 = Xmm(4);
+static constexpr Xmm xmm5 = Xmm(5);
+static constexpr Xmm xmm6 = Xmm(6);
+static constexpr Xmm xmm7 = Xmm(7);
+static constexpr Xmm xmm8 = Xmm(8);
+static constexpr Xmm xmm9 = Xmm(9);
+static constexpr Xmm xmm10 = Xmm(10);
+static constexpr Xmm xmm11 = Xmm(11);
+static constexpr Xmm xmm12 = Xmm(12);
+static constexpr Xmm xmm13 = Xmm(13);
+static constexpr Xmm xmm14 = Xmm(14);
+static constexpr Xmm xmm15 = Xmm(15);
+static constexpr Xmm xmm16 = Xmm(16);
+static constexpr Xmm xmm17 = Xmm(17);
+static constexpr Xmm xmm18 = Xmm(18);
+static constexpr Xmm xmm19 = Xmm(19);
+static constexpr Xmm xmm20 = Xmm(20);
+static constexpr Xmm xmm21 = Xmm(21);
+static constexpr Xmm xmm22 = Xmm(22);
+static constexpr Xmm xmm23 = Xmm(23);
+static constexpr Xmm xmm24 = Xmm(24);
+static constexpr Xmm xmm25 = Xmm(25);
+static constexpr Xmm xmm26 = Xmm(26);
+static constexpr Xmm xmm27 = Xmm(27);
+static constexpr Xmm xmm28 = Xmm(28);
+static constexpr Xmm xmm29 = Xmm(29);
+static constexpr Xmm xmm30 = Xmm(30);
+static constexpr Xmm xmm31 = Xmm(31);
+
+static constexpr Ymm ymm0 = Ymm(0);
+static constexpr Ymm ymm1 = Ymm(1);
+static constexpr Ymm ymm2 = Ymm(2);
+static constexpr Ymm ymm3 = Ymm(3);
+static constexpr Ymm ymm4 = Ymm(4);
+static constexpr Ymm ymm5 = Ymm(5);
+static constexpr Ymm ymm6 = Ymm(6);
+static constexpr Ymm ymm7 = Ymm(7);
+static constexpr Ymm ymm8 = Ymm(8);
+static constexpr Ymm ymm9 = Ymm(9);
+static constexpr Ymm ymm10 = Ymm(10);
+static constexpr Ymm ymm11 = Ymm(11);
+static constexpr Ymm ymm12 = Ymm(12);
+static constexpr Ymm ymm13 = Ymm(13);
+static constexpr Ymm ymm14 = Ymm(14);
+static constexpr Ymm ymm15 = Ymm(15);
+static constexpr Ymm ymm16 = Ymm(16);
+static constexpr Ymm ymm17 = Ymm(17);
+static constexpr Ymm ymm18 = Ymm(18);
+static constexpr Ymm ymm19 = Ymm(19);
+static constexpr Ymm ymm20 = Ymm(20);
+static constexpr Ymm ymm21 = Ymm(21);
+static constexpr Ymm ymm22 = Ymm(22);
+static constexpr Ymm ymm23 = Ymm(23);
+static constexpr Ymm ymm24 = Ymm(24);
+static constexpr Ymm ymm25 = Ymm(25);
+static constexpr Ymm ymm26 = Ymm(26);
+static constexpr Ymm ymm27 = Ymm(27);
+static constexpr Ymm ymm28 = Ymm(28);
+static constexpr Ymm ymm29 = Ymm(29);
+static constexpr Ymm ymm30 = Ymm(30);
+static constexpr Ymm ymm31 = Ymm(31);
+
+static constexpr Zmm zmm0 = Zmm(0);
+static constexpr Zmm zmm1 = Zmm(1);
+static constexpr Zmm zmm2 = Zmm(2);
+static constexpr Zmm zmm3 = Zmm(3);
+static constexpr Zmm zmm4 = Zmm(4);
+static constexpr Zmm zmm5 = Zmm(5);
+static constexpr Zmm zmm6 = Zmm(6);
+static constexpr Zmm zmm7 = Zmm(7);
+static constexpr Zmm zmm8 = Zmm(8);
+static constexpr Zmm zmm9 = Zmm(9);
+static constexpr Zmm zmm10 = Zmm(10);
+static constexpr Zmm zmm11 = Zmm(11);
+static constexpr Zmm zmm12 = Zmm(12);
+static constexpr Zmm zmm13 = Zmm(13);
+static constexpr Zmm zmm14 = Zmm(14);
+static constexpr Zmm zmm15 = Zmm(15);
+static constexpr Zmm zmm16 = Zmm(16);
+static constexpr Zmm zmm17 = Zmm(17);
+static constexpr Zmm zmm18 = Zmm(18);
+static constexpr Zmm zmm19 = Zmm(19);
+static constexpr Zmm zmm20 = Zmm(20);
+static constexpr Zmm zmm21 = Zmm(21);
+static constexpr Zmm zmm22 = Zmm(22);
+static constexpr Zmm zmm23 = Zmm(23);
+static constexpr Zmm zmm24 = Zmm(24);
+static constexpr Zmm zmm25 = Zmm(25);
+static constexpr Zmm zmm26 = Zmm(26);
+static constexpr Zmm zmm27 = Zmm(27);
+static constexpr Zmm zmm28 = Zmm(28);
+static constexpr Zmm zmm29 = Zmm(29);
+static constexpr Zmm zmm30 = Zmm(30);
+static constexpr Zmm zmm31 = Zmm(31);
+
+static constexpr Mm mm0 = Mm(0);
+static constexpr Mm mm1 = Mm(1);
+static constexpr Mm mm2 = Mm(2);
+static constexpr Mm mm3 = Mm(3);
+static constexpr Mm mm4 = Mm(4);
+static constexpr Mm mm5 = Mm(5);
+static constexpr Mm mm6 = Mm(6);
+static constexpr Mm mm7 = Mm(7);
+
+static constexpr KReg k0 = KReg(0);
+static constexpr KReg k1 = KReg(1);
+static constexpr KReg k2 = KReg(2);
+static constexpr KReg k3 = KReg(3);
+static constexpr KReg k4 = KReg(4);
+static constexpr KReg k5 = KReg(5);
+static constexpr KReg k6 = KReg(6);
+static constexpr KReg k7 = KReg(7);
+
+static constexpr SReg no_seg = SReg(SReg::kIdNone);
+static constexpr SReg es = SReg(SReg::kIdEs);
+static constexpr SReg cs = SReg(SReg::kIdCs);
+static constexpr SReg ss = SReg(SReg::kIdSs);
+static constexpr SReg ds = SReg(SReg::kIdDs);
+static constexpr SReg fs = SReg(SReg::kIdFs);
+static constexpr SReg gs = SReg(SReg::kIdGs);
+
+static constexpr CReg cr0 = CReg(0);
+static constexpr CReg cr1 = CReg(1);
+static constexpr CReg cr2 = CReg(2);
+static constexpr CReg cr3 = CReg(3);
+static constexpr CReg cr4 = CReg(4);
+static constexpr CReg cr5 = CReg(5);
+static constexpr CReg cr6 = CReg(6);
+static constexpr CReg cr7 = CReg(7);
+static constexpr CReg cr8 = CReg(8);
+static constexpr CReg cr9 = CReg(9);
+static constexpr CReg cr10 = CReg(10);
+static constexpr CReg cr11 = CReg(11);
+static constexpr CReg cr12 = CReg(12);
+static constexpr CReg cr13 = CReg(13);
+static constexpr CReg cr14 = CReg(14);
+static constexpr CReg cr15 = CReg(15);
+
+static constexpr DReg dr0 = DReg(0);
+static constexpr DReg dr1 = DReg(1);
+static constexpr DReg dr2 = DReg(2);
+static constexpr DReg dr3 = DReg(3);
+static constexpr DReg dr4 = DReg(4);
+static constexpr DReg dr5 = DReg(5);
+static constexpr DReg dr6 = DReg(6);
+static constexpr DReg dr7 = DReg(7);
+static constexpr DReg dr8 = DReg(8);
+static constexpr DReg dr9 = DReg(9);
+static constexpr DReg dr10 = DReg(10);
+static constexpr DReg dr11 = DReg(11);
+static constexpr DReg dr12 = DReg(12);
+static constexpr DReg dr13 = DReg(13);
+static constexpr DReg dr14 = DReg(14);
+static constexpr DReg dr15 = DReg(15);
+
+static constexpr St st0 = St(0);
+static constexpr St st1 = St(1);
+static constexpr St st2 = St(2);
+static constexpr St st3 = St(3);
+static constexpr St st4 = St(4);
+static constexpr St st5 = St(5);
+static constexpr St st6 = St(6);
+static constexpr St st7 = St(7);
+
+static constexpr Bnd bnd0 = Bnd(0);
+static constexpr Bnd bnd1 = Bnd(1);
+static constexpr Bnd bnd2 = Bnd(2);
+static constexpr Bnd bnd3 = Bnd(3);
+
+static constexpr Tmm tmm0 = Tmm(0);
+static constexpr Tmm tmm1 = Tmm(1);
+static constexpr Tmm tmm2 = Tmm(2);
+static constexpr Tmm tmm3 = Tmm(3);
+static constexpr Tmm tmm4 = Tmm(4);
+static constexpr Tmm tmm5 = Tmm(5);
+static constexpr Tmm tmm6 = Tmm(6);
+static constexpr Tmm tmm7 = Tmm(7);
+
+static constexpr Rip rip = Rip(0);
+
+#ifndef _DOXYGEN
+} // {regs}
+
+// Make `x86::regs` accessible through `x86` namespace as well.
+using namespace regs;
+#endif
+
+//! Memory operand specific to X86 and X86_64 architecture.
+class Mem : public BaseMem {
+public:
+  //! \name Constants
+  //! \{
+
+  //! Additional bits of operand's signature used by `x86::Mem`.
+  enum AdditionalBits : uint32_t {
+    // Memory address type (2 bits).
+    // |........|........|XX......|........|
+    kSignatureMemAddrTypeShift = 14,
+    kSignatureMemAddrTypeMask = 0x03u << kSignatureMemAddrTypeShift,
+
+    // Memory shift amount (2 bits).
+    // |........|......XX|........|........|
+    kSignatureMemShiftValueShift = 16,
+    kSignatureMemShiftValueMask = 0x03u << kSignatureMemShiftValueShift,
+
+    // Memory segment reg (3 bits).
+    // |........|...XXX..|........|........|
+    kSignatureMemSegmentShift = 18,
+    kSignatureMemSegmentMask = 0x07u << kSignatureMemSegmentShift,
+
+    // Memory broadcast type (3 bits).
+    // |........|XXX.....|........|........|
+    kSignatureMemBroadcastShift = 21,
+    kSignatureMemBroadcastMask = 0x7u << kSignatureMemBroadcastShift
+  };
+
+  //! Address type.
+  enum class AddrType : uint32_t {
+    //! Default address type, Assembler will select the best type when necessary.
+    kDefault = 0,
+    //! Absolute address type.
+    kAbs = 1,
+    //! Relative address type.
+    kRel = 2,
+
+    //! Maximum value of `AddrType`.
+    kMaxValue = kRel
+  };
+
+  //! Memory broadcast type.
+  enum class Broadcast : uint32_t {
+    //! No broadcast (regular memory operand).
+    kNone = 0,
+    //! Broadcast {1to2}.
+    k1To2 = 1,
+    //! Broadcast {1to4}.
+    k1To4 = 2,
+    //! Broadcast {1to8}.
+    k1To8 = 3,
+    //! Broadcast {1to16}.
+    k1To16 = 4,
+    //! Broadcast {1to32}.
+    k1To32 = 5,
+    //! Broadcast {1to64}.
+    k1To64 = 6,
+
+    //! Maximum value of `Broadcast`.
+    kMaxValue = k1To64
+  };
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a default `Mem` operand that points to [0].
+  inline constexpr Mem() noexcept
+    : BaseMem() {}
+
+  inline constexpr Mem(const Mem& other) noexcept
+    : BaseMem(other) {}
+
+  inline explicit Mem(Globals::NoInit_) noexcept
+    : BaseMem(Globals::NoInit) {}
+
+  inline constexpr Mem(const Signature& signature, uint32_t baseId, uint32_t indexId, int32_t offset) noexcept
+    : BaseMem(signature, baseId, indexId, offset) {}
+
+  inline constexpr Mem(const Label& base, int32_t off, uint32_t size = 0, Signature signature = OperandSignature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(RegType::kLabelTag) |
+              Signature::fromSize(size) |
+              signature, base.id(), 0, off) {}
+
+  inline constexpr Mem(const Label& base, const BaseReg& index, uint32_t shift, int32_t off, uint32_t size = 0, Signature signature = OperandSignature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(RegType::kLabelTag) |
+              Signature::fromMemIndexType(index.type()) |
+              Signature::fromValue<kSignatureMemShiftValueMask>(shift) |
+              Signature::fromSize(size) |
+              signature, base.id(), index.id(), off) {}
+
+  inline constexpr Mem(const BaseReg& base, int32_t off, uint32_t size = 0, Signature signature = OperandSignature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(base.type()) |
+              Signature::fromSize(size) |
+              signature, base.id(), 0, off) {}
+
+  inline constexpr Mem(const BaseReg& base, const BaseReg& index, uint32_t shift, int32_t off, uint32_t size = 0, Signature signature = OperandSignature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(base.type()) |
+              Signature::fromMemIndexType(index.type()) |
+              Signature::fromValue<kSignatureMemShiftValueMask>(shift) |
+              Signature::fromSize(size) |
+              signature, base.id(), index.id(), off) {}
+
+  inline constexpr explicit Mem(uint64_t base, uint32_t size = 0, Signature signature = OperandSignature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromSize(size) |
+              signature, uint32_t(base >> 32), 0, int32_t(uint32_t(base & 0xFFFFFFFFu))) {}
+
+  inline constexpr Mem(uint64_t base, const BaseReg& index, uint32_t shift = 0, uint32_t size = 0, Signature signature = OperandSignature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemIndexType(index.type()) |
+              Signature::fromValue<kSignatureMemShiftValueMask>(shift) |
+              Signature::fromSize(size) |
+              signature, uint32_t(base >> 32), index.id(), int32_t(uint32_t(base & 0xFFFFFFFFu))) {}
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline Mem& operator=(const Mem& other) noexcept = default;
+
+  //! \}
+
+  //! \name Clone
+  //! \{
+
+  //! Clones the memory operand.
+  inline constexpr Mem clone() const noexcept { return Mem(*this); }
+
+  //! Creates a new copy of this memory operand adjusted by `off`.
+  inline Mem cloneAdjusted(int64_t off) const noexcept {
+    Mem result(*this);
+    result.addOffset(off);
+    return result;
+  }
+
+  inline constexpr Mem cloneBroadcasted(Broadcast b) const noexcept {
+    return Mem((_signature & ~Signature{kSignatureMemBroadcastMask}) | Signature::fromValue<kSignatureMemBroadcastMask>(b), _baseId, _data[0], int32_t(_data[1]));
+  }
+
+  //! \}
+
+  //! \name Base & Index
+  //! \{
+
+  //! Converts memory `baseType` and `baseId` to `x86::Reg` instance.
+  //!
+  //! The memory must have a valid base register otherwise the result will be wrong.
+  inline Reg baseReg() const noexcept { return Reg::fromTypeAndId(baseType(), baseId()); }
+
+  //! Converts memory `indexType` and `indexId` to `x86::Reg` instance.
+  //!
+  //! The memory must have a valid index register otherwise the result will be wrong.
+  inline Reg indexReg() const noexcept { return Reg::fromTypeAndId(indexType(), indexId()); }
+
+  using BaseMem::setIndex;
+
+  inline void setIndex(const BaseReg& index, uint32_t shift) noexcept {
+    setIndex(index);
+    setShift(shift);
+  }
+
+  //! \}
+
+  //! \name Address Type
+  //! \{
+
+  //! Returns the address type of the memory operand.
+  //!
+  //! By default, address type of newly created memory operands is always \ref AddrType::kDefault.
+  inline constexpr AddrType addrType() const noexcept { return (AddrType)_signature.getField<kSignatureMemAddrTypeMask>(); }
+  //! Sets the address type to `addrType`.
+  inline void setAddrType(AddrType addrType) noexcept { _signature.setField<kSignatureMemAddrTypeMask>(uint32_t(addrType)); }
+  //! Resets the address type to \ref AddrType::kDefault.
+  inline void resetAddrType() noexcept { _signature.setField<kSignatureMemAddrTypeMask>(uint32_t(AddrType::kDefault)); }
+
+  //! Tests whether the address type is \ref AddrType::kAbs.
+  inline constexpr bool isAbs() const noexcept { return addrType() == AddrType::kAbs; }
+  //! Sets the address type to \ref AddrType::kAbs.
+  inline void setAbs() noexcept { setAddrType(AddrType::kAbs); }
+
+  //! Tests whether the address type is \ref AddrType::kRel.
+  inline constexpr bool isRel() const noexcept { return addrType() == AddrType::kRel; }
+  //! Sets the address type to \ref AddrType::kRel.
+  inline void setRel() noexcept { setAddrType(AddrType::kRel); }
+
+  //! \}
+
+  //! \name Segment
+  //! \{
+
+  //! Tests whether the memory operand has a segment override.
+  inline constexpr bool hasSegment() const noexcept { return _signature.hasField<kSignatureMemSegmentMask>(); }
+  //! Returns the associated segment override as `SReg` operand.
+  inline constexpr SReg segment() const noexcept { return SReg(segmentId()); }
+  //! Returns segment override register id, see `SReg::Id`.
+  inline constexpr uint32_t segmentId() const noexcept { return _signature.getField<kSignatureMemSegmentMask>(); }
+
+  //! Sets the segment override to `seg`.
+  inline void setSegment(const SReg& seg) noexcept { setSegment(seg.id()); }
+  //! Sets the segment override to `id`.
+  inline void setSegment(uint32_t rId) noexcept { _signature.setField<kSignatureMemSegmentMask>(rId); }
+  //! Resets the segment override.
+  inline void resetSegment() noexcept { _signature.setField<kSignatureMemSegmentMask>(0); }
+
+  //! \}
+
+  //! \name Shift
+  //! \{
+
+  //! Tests whether the memory operand has shift (aka scale) value.
+  inline constexpr bool hasShift() const noexcept { return _signature.hasField<kSignatureMemShiftValueMask>(); }
+  //! Returns the memory operand's shift (aka scale) value.
+  inline constexpr uint32_t shift() const noexcept { return _signature.getField<kSignatureMemShiftValueMask>(); }
+  //! Sets the memory operand's shift (aka scale) value.
+  inline void setShift(uint32_t shift) noexcept { _signature.setField<kSignatureMemShiftValueMask>(shift); }
+  //! Resets the memory operand's shift (aka scale) value to zero.
+  inline void resetShift() noexcept { _signature.setField<kSignatureMemShiftValueMask>(0); }
+
+  //! \}
+
+  //! \name Broadcast
+  //! \{
+
+  //! Tests whether the memory operand has broadcast {1tox}.
+  inline constexpr bool hasBroadcast() const noexcept { return _signature.hasField<kSignatureMemBroadcastMask>(); }
+  //! Returns the memory operand's broadcast.
+  inline constexpr Broadcast getBroadcast() const noexcept { return (Broadcast)_signature.getField<kSignatureMemBroadcastMask>(); }
+  //! Sets the memory operand's broadcast.
+  inline void setBroadcast(Broadcast b) noexcept { _signature.setField<kSignatureMemBroadcastMask>(uint32_t(b)); }
+  //! Resets the memory operand's broadcast to none.
+  inline void resetBroadcast() noexcept { _signature.setField<kSignatureMemBroadcastMask>(0); }
+
+  //! Returns a new `Mem` without a broadcast (the possible broadcast is cleared).
+  inline constexpr Mem _1to1() const noexcept { return cloneBroadcasted(Broadcast::kNone); }
+  //! Returns a new `Mem` with {1to2} broadcast (AVX-512).
+  inline constexpr Mem _1to2() const noexcept { return cloneBroadcasted(Broadcast::k1To2); }
+  //! Returns a new `Mem` with {1to4} broadcast (AVX-512).
+  inline constexpr Mem _1to4() const noexcept { return cloneBroadcasted(Broadcast::k1To4); }
+  //! Returns a new `Mem` with {1to8} broadcast (AVX-512).
+  inline constexpr Mem _1to8() const noexcept { return cloneBroadcasted(Broadcast::k1To8); }
+  //! Returns a new `Mem` with {1to16} broadcast (AVX-512).
+  inline constexpr Mem _1to16() const noexcept { return cloneBroadcasted(Broadcast::k1To16); }
+  //! Returns a new `Mem` with {1to32} broadcast (AVX-512).
+  inline constexpr Mem _1to32() const noexcept { return cloneBroadcasted(Broadcast::k1To32); }
+  //! Returns a new `Mem` with {1to64} broadcast (AVX-512).
+  inline constexpr Mem _1to64() const noexcept { return cloneBroadcasted(Broadcast::k1To64); }
+
+  //! \}
+};
+
+//! Creates `[base.reg + offset]` memory operand.
+static inline constexpr Mem ptr(const Gp& base, int32_t offset = 0, uint32_t size = 0) noexcept {
+  return Mem(base, offset, size);
+}
+//! Creates `[base.reg + (index << shift) + offset]` memory operand (scalar index).
+static inline constexpr Mem ptr(const Gp& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, offset, size);
+}
+//! Creates `[base.reg + (index << shift) + offset]` memory operand (vector index).
+static inline constexpr Mem ptr(const Gp& base, const Vec& index, uint32_t shift = 0, int32_t offset = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, offset, size);
+}
+
+//! Creates `[base + offset]` memory operand.
+static inline constexpr Mem ptr(const Label& base, int32_t offset = 0, uint32_t size = 0) noexcept {
+  return Mem(base, offset, size);
+}
+//! Creates `[base + (index << shift) + offset]` memory operand.
+static inline constexpr Mem ptr(const Label& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, offset, size);
+}
+//! Creates `[base + (index << shift) + offset]` memory operand.
+static inline constexpr Mem ptr(const Label& base, const Vec& index, uint32_t shift = 0, int32_t offset = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, offset, size);
+}
+
+//! Creates `[rip + offset]` memory operand.
+static inline constexpr Mem ptr(const Rip& rip_, int32_t offset = 0, uint32_t size = 0) noexcept {
+  return Mem(rip_, offset, size);
+}
+
+//! Creates `[base]` absolute memory operand.
+static inline constexpr Mem ptr(uint64_t base, uint32_t size = 0) noexcept {
+  return Mem(base, size);
+}
+//! Creates `[base + (index.reg << shift)]` absolute memory operand.
+static inline constexpr Mem ptr(uint64_t base, const Reg& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, size);
+}
+//! Creates `[base + (index.reg << shift)]` absolute memory operand.
+static inline constexpr Mem ptr(uint64_t base, const Vec& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, size);
+}
+
+//! Creates `[base]` absolute memory operand (absolute).
+static inline constexpr Mem ptr_abs(uint64_t base, uint32_t size = 0) noexcept {
+  return Mem(base, size, OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kAbs));
+}
+//! Creates `[base + (index.reg << shift)]` absolute memory operand (absolute).
+static inline constexpr Mem ptr_abs(uint64_t base, const Reg& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, size, OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kAbs));
+}
+//! Creates `[base + (index.reg << shift)]` absolute memory operand (absolute).
+static inline constexpr Mem ptr_abs(uint64_t base, const Vec& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, size, OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kAbs));
+}
+
+//! Creates `[base]` relative memory operand (relative).
+static inline constexpr Mem ptr_rel(uint64_t base, uint32_t size = 0) noexcept {
+  return Mem(base, size, OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kRel));
+}
+//! Creates `[base + (index.reg << shift)]` relative memory operand (relative).
+static inline constexpr Mem ptr_rel(uint64_t base, const Reg& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, size, OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kRel));
+}
+//! Creates `[base + (index.reg << shift)]` relative memory operand (relative).
+static inline constexpr Mem ptr_rel(uint64_t base, const Vec& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, size, OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kRel));
+}
+
+#define ASMJIT_MEM_PTR(FUNC, SIZE)                                                    \
+  static constexpr Mem FUNC(const Gp& base, int32_t offset = 0) noexcept {            \
+    return Mem(base, offset, SIZE);                                                   \
+  }                                                                                   \
+  static constexpr Mem FUNC(const Gp& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0) noexcept { \
+    return Mem(base, index, shift, offset, SIZE);                                     \
+  }                                                                                   \
+  static constexpr Mem FUNC(const Gp& base, const Vec& index, uint32_t shift = 0, int32_t offset = 0) noexcept { \
+    return Mem(base, index, shift, offset, SIZE);                                     \
+  }                                                                                   \
+  static constexpr Mem FUNC(const Label& base, int32_t offset = 0) noexcept {         \
+    return Mem(base, offset, SIZE);                                                   \
+  }                                                                                   \
+  static constexpr Mem FUNC(const Label& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0) noexcept { \
+    return Mem(base, index, shift, offset, SIZE);                                     \
+  }                                                                                   \
+  static constexpr Mem FUNC(const Rip& rip_, int32_t offset = 0) noexcept {           \
+    return Mem(rip_, offset, SIZE);                                                   \
+  }                                                                                   \
+  static constexpr Mem FUNC(uint64_t base) noexcept {                                 \
+    return Mem(base, SIZE);                                                           \
+  }                                                                                   \
+  static constexpr Mem FUNC(uint64_t base, const Gp& index, uint32_t shift = 0) noexcept { \
+    return Mem(base, index, shift, SIZE);                                             \
+  }                                                                                   \
+  static constexpr Mem FUNC(uint64_t base, const Vec& index, uint32_t shift = 0) noexcept { \
+    return Mem(base, index, shift, SIZE);                                             \
+  }                                                                                   \
+                                                                                      \
+  static constexpr Mem FUNC##_abs(uint64_t base) noexcept {                           \
+    return Mem(base, SIZE,                                                            \
+      OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kAbs)); \
+  }                                                                                   \
+  static constexpr Mem FUNC##_abs(uint64_t base, const Gp& index, uint32_t shift = 0) noexcept { \
+    return Mem(base, index, shift, SIZE,                                              \
+      OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kAbs)); \
+  }                                                                                   \
+  static constexpr Mem FUNC##_abs(uint64_t base, const Vec& index, uint32_t shift = 0) noexcept { \
+    return Mem(base, index, shift, SIZE,                                              \
+      OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kAbs)); \
+  }                                                                                   \
+                                                                                      \
+  static constexpr Mem FUNC##_rel(uint64_t base) noexcept {                           \
+    return Mem(base, SIZE,                                                            \
+      OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kRel)); \
+  }                                                                                   \
+  static constexpr Mem FUNC##_rel(uint64_t base, const Gp& index, uint32_t shift = 0) noexcept { \
+    return Mem(base, index, shift, SIZE,                                              \
+      OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kRel)); \
+  }                                                                                   \
+  static constexpr Mem FUNC##_rel(uint64_t base, const Vec& index, uint32_t shift = 0) noexcept { \
+    return Mem(base, index, shift, SIZE,                                              \
+      OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kRel)); \
+  }
+
+// Definition of memory operand constructors that use platform independent naming.
+ASMJIT_MEM_PTR(ptr_8, 1)
+ASMJIT_MEM_PTR(ptr_16, 2)
+ASMJIT_MEM_PTR(ptr_32, 4)
+ASMJIT_MEM_PTR(ptr_48, 6)
+ASMJIT_MEM_PTR(ptr_64, 8)
+ASMJIT_MEM_PTR(ptr_80, 10)
+ASMJIT_MEM_PTR(ptr_128, 16)
+ASMJIT_MEM_PTR(ptr_256, 32)
+ASMJIT_MEM_PTR(ptr_512, 64)
+
+// Definition of memory operand constructors that use X86-specific convention.
+ASMJIT_MEM_PTR(byte_ptr, 1)
+ASMJIT_MEM_PTR(word_ptr, 2)
+ASMJIT_MEM_PTR(dword_ptr, 4)
+ASMJIT_MEM_PTR(fword_ptr, 6)
+ASMJIT_MEM_PTR(qword_ptr, 8)
+ASMJIT_MEM_PTR(tbyte_ptr, 10)
+ASMJIT_MEM_PTR(tword_ptr, 10)
+ASMJIT_MEM_PTR(oword_ptr, 16)
+ASMJIT_MEM_PTR(dqword_ptr, 16)
+ASMJIT_MEM_PTR(qqword_ptr, 32)
+ASMJIT_MEM_PTR(xmmword_ptr, 16)
+ASMJIT_MEM_PTR(ymmword_ptr, 32)
+ASMJIT_MEM_PTR(zmmword_ptr, 64)
+
+#undef ASMJIT_MEM_PTR
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+//! \cond INTERNAL
+ASMJIT_BEGIN_NAMESPACE
+ASMJIT_DEFINE_TYPE_ID(x86::Gpb, TypeId::kInt8);
+ASMJIT_DEFINE_TYPE_ID(x86::Gpw, TypeId::kInt16);
+ASMJIT_DEFINE_TYPE_ID(x86::Gpd, TypeId::kInt32);
+ASMJIT_DEFINE_TYPE_ID(x86::Gpq, TypeId::kInt64);
+ASMJIT_DEFINE_TYPE_ID(x86::Mm , TypeId::kMmx64);
+ASMJIT_DEFINE_TYPE_ID(x86::Xmm, TypeId::kInt32x4);
+ASMJIT_DEFINE_TYPE_ID(x86::Ymm, TypeId::kInt32x8);
+ASMJIT_DEFINE_TYPE_ID(x86::Zmm, TypeId::kInt32x16);
+ASMJIT_END_NAMESPACE
+//! \endcond
+
+#endif // ASMJIT_X86_X86OPERAND_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86rapass.cpp b/lib/lepton/asmjit/x86/x86rapass.cpp
new file mode 100644
index 0000000000..02870d95ff
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86rapass.cpp
@@ -0,0 +1,1509 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86) && !defined(ASMJIT_NO_COMPILER)
+
+#include "../core/cpuinfo.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../x86/x86assembler.h"
+#include "../x86/x86compiler.h"
+#include "../x86/x86instapi_p.h"
+#include "../x86/x86instdb_p.h"
+#include "../x86/x86emithelper_p.h"
+#include "../x86/x86rapass_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::X86RAPass - Utilities
+// ==========================
+
+static ASMJIT_FORCE_INLINE uint64_t raImmMaskFromSize(uint32_t size) noexcept {
+  ASMJIT_ASSERT(size > 0 && size < 256);
+  static constexpr uint64_t masks[] = {
+    0x00000000000000FFu, //   1
+    0x000000000000FFFFu, //   2
+    0x00000000FFFFFFFFu, //   4
+    0xFFFFFFFFFFFFFFFFu, //   8
+    0x0000000000000000u, //  16
+    0x0000000000000000u, //  32
+    0x0000000000000000u, //  64
+    0x0000000000000000u, // 128
+    0x0000000000000000u  // 256
+  };
+  return masks[Support::ctz(size)];
+}
+
+static const RegMask raConsecutiveLeadCountToRegMaskFilter[5] = {
+  0xFFFFFFFFu, // [0] No consecutive.
+  0x00000000u, // [1] Invalid, never used.
+  0x55555555u, // [2] Even registers.
+  0x00000000u, // [3] Invalid, never used.
+  0x11111111u  // [4] Every fourth register.
+};
+
+static ASMJIT_FORCE_INLINE RATiedFlags raUseOutFlagsFromRWFlags(OpRWFlags rwFlags) noexcept {
+  static constexpr RATiedFlags map[] = {
+    RATiedFlags::kNone,
+    RATiedFlags::kRead  | RATiedFlags::kUse,                       // kRead
+    RATiedFlags::kWrite | RATiedFlags::kOut,                       // kWrite
+    RATiedFlags::kRW    | RATiedFlags::kUse,                       // kRW
+    RATiedFlags::kNone,
+    RATiedFlags::kRead  | RATiedFlags::kUse | RATiedFlags::kUseRM, // kRead  | kRegMem
+    RATiedFlags::kWrite | RATiedFlags::kOut | RATiedFlags::kOutRM, // kWrite | kRegMem
+    RATiedFlags::kRW    | RATiedFlags::kUse | RATiedFlags::kUseRM  // kRW    | kRegMem
+  };
+
+  return map[uint32_t(rwFlags & (OpRWFlags::kRW | OpRWFlags::kRegMem))];
+}
+
+static ASMJIT_FORCE_INLINE RATiedFlags raRegRwFlags(OpRWFlags flags) noexcept {
+  return (RATiedFlags)raUseOutFlagsFromRWFlags(flags);
+}
+
+static ASMJIT_FORCE_INLINE RATiedFlags raMemBaseRwFlags(OpRWFlags flags) noexcept {
+  constexpr uint32_t kShift = Support::ConstCTZ<uint32_t(OpRWFlags::kMemBaseRW)>::value;
+  return (RATiedFlags)raUseOutFlagsFromRWFlags(OpRWFlags(uint32_t(flags) >> kShift) & OpRWFlags::kRW);
+}
+
+static ASMJIT_FORCE_INLINE RATiedFlags raMemIndexRwFlags(OpRWFlags flags) noexcept {
+  constexpr uint32_t kShift = Support::ConstCTZ<uint32_t(OpRWFlags::kMemIndexRW)>::value;
+  return (RATiedFlags)raUseOutFlagsFromRWFlags(OpRWFlags(uint32_t(flags) >> kShift) & OpRWFlags::kRW);
+}
+
+// x86::RACFGBuilder
+// =================
+
+class RACFGBuilder : public RACFGBuilderT<RACFGBuilder> {
+public:
+  Arch _arch;
+  bool _is64Bit;
+  bool _avxEnabled;
+
+  inline RACFGBuilder(X86RAPass* pass) noexcept
+    : RACFGBuilderT<RACFGBuilder>(pass),
+      _arch(pass->cc()->arch()),
+      _is64Bit(pass->registerSize() == 8),
+      _avxEnabled(pass->avxEnabled()) {
+  }
+
+  inline Compiler* cc() const noexcept { return static_cast<Compiler*>(_cc); }
+
+  inline uint32_t choose(uint32_t sseInst, uint32_t avxInst) const noexcept {
+    return _avxEnabled ? avxInst : sseInst;
+  }
+
+  Error onInst(InstNode* inst, InstControlFlow& cf, RAInstBuilder& ib) noexcept;
+
+  Error onBeforeInvoke(InvokeNode* invokeNode) noexcept;
+  Error onInvoke(InvokeNode* invokeNode, RAInstBuilder& ib) noexcept;
+
+  Error moveVecToPtr(InvokeNode* invokeNode, const FuncValue& arg, const Vec& src, BaseReg* out) noexcept;
+  Error moveImmToRegArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept;
+  Error moveImmToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_) noexcept;
+  Error moveRegToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const BaseReg& reg) noexcept;
+
+  Error onBeforeRet(FuncRetNode* funcRet) noexcept;
+  Error onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept;
+};
+
+// x86::RACFGBuilder - OnInst
+// ==========================
+
+Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& cf, RAInstBuilder& ib) noexcept {
+  InstRWInfo rwInfo;
+
+  InstId instId = inst->id();
+  if (Inst::isDefinedId(instId)) {
+    uint32_t opCount = inst->opCount();
+    const Operand* opArray = inst->operands();
+    ASMJIT_PROPAGATE(InstInternal::queryRWInfo(_arch, inst->baseInst(), opArray, opCount, &rwInfo));
+
+    const InstDB::InstInfo& instInfo = InstDB::infoById(instId);
+    bool hasGpbHiConstraint = false;
+    uint32_t singleRegOps = 0;
+
+    // Copy instruction RW flags to instruction builder except kMovOp, which is propagated manually later.
+    ib.addInstRWFlags(rwInfo.instFlags() & ~InstRWFlags::kMovOp);
+
+    // Mask of all operand types used by the instruction - can be used as an optimization later.
+    uint32_t opTypesMask = 0u;
+
+    if (opCount) {
+      // The mask is for all registers, but we are mostly interested in AVX-512 registers at the moment. The mask
+      // will be combined with all available registers of the Compiler at the end so we it never use more registers
+      // than available.
+      RegMask instructionAllowedRegs = 0xFFFFFFFFu;
+
+      uint32_t consecutiveOffset = 0;
+      uint32_t consecutiveLeadId = Globals::kInvalidId;
+      uint32_t consecutiveParent = Globals::kInvalidId;
+
+      if (instInfo.isEvex()) {
+        // EVEX instruction and VEX instructions that can be encoded with EVEX have the possibility to use 32 SIMD
+        // registers (XMM/YMM/ZMM).
+        if (instInfo.isVex() && !instInfo.isEvexCompatible()) {
+          if (instInfo.isEvexKRegOnly()) {
+            // EVEX encodable only if the first operand is K register (compare instructions).
+            if (!Reg::isKReg(opArray[0]))
+              instructionAllowedRegs = 0xFFFFu;
+          }
+          else if (instInfo.isEvexTwoOpOnly()) {
+            // EVEX encodable only if the instruction has two operands (gather instructions).
+            if (opCount != 2)
+              instructionAllowedRegs = 0xFFFFu;
+          }
+          else {
+            instructionAllowedRegs = 0xFFFFu;
+          }
+        }
+      }
+      else if (instInfo.isEvexTransformable()) {
+        ib.addAggregatedFlags(RATiedFlags::kInst_IsTransformable);
+      }
+      else {
+        // Not EVEX, restrict everything to [0-15] registers.
+        instructionAllowedRegs = 0xFFFFu;
+      }
+
+      for (uint32_t i = 0; i < opCount; i++) {
+        const Operand& op = opArray[i];
+        const OpRWInfo& opRwInfo = rwInfo.operand(i);
+
+        opTypesMask |= 1u << uint32_t(op.opType());
+
+        if (op.isReg()) {
+          // Register Operand
+          // ----------------
+          const Reg& reg = op.as<Reg>();
+
+          RATiedFlags flags = raRegRwFlags(opRwInfo.opFlags());
+          RegMask allowedRegs = instructionAllowedRegs;
+
+          // X86-specific constraints related to LO|HI general purpose registers. This is only required when the
+          // register is part of the encoding. If the register is fixed we won't restrict anything as it doesn't
+          // restrict encoding of other registers.
+          if (reg.isGpb() && !opRwInfo.hasOpFlag(OpRWFlags::kRegPhysId)) {
+            flags |= RATiedFlags::kX86_Gpb;
+            if (!_is64Bit) {
+              // Restrict to first four - AL|AH|BL|BH|CL|CH|DL|DH. In 32-bit mode it's not possible to access
+              // SIL|DIL, etc, so this is just enough.
+              allowedRegs = 0x0Fu;
+            }
+            else {
+              // If we encountered GPB-HI register the situation is much more complicated than in 32-bit mode.
+              // We need to patch all registers to not use ID higher than 7 and all GPB-LO registers to not use
+              // index higher than 3. Instead of doing the patching here we just set a flag and will do it later,
+              // to not complicate this loop.
+              if (reg.isGpbHi()) {
+                hasGpbHiConstraint = true;
+                allowedRegs = 0x0Fu;
+              }
+            }
+          }
+
+          uint32_t vIndex = Operand::virtIdToIndex(reg.id());
+          if (vIndex < Operand::kVirtIdCount) {
+            RAWorkReg* workReg;
+            ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+            // Use RW instead of Write in case that not the whole register is overwritten. This is important
+            // for liveness as we cannot kill a register that will be used. For example `mov al, 0xFF` is not
+            // a write-only operation if user allocated the whole `rax` register.
+            if ((flags & RATiedFlags::kRW) == RATiedFlags::kWrite) {
+              if (workReg->regByteMask() & ~(opRwInfo.writeByteMask() | opRwInfo.extendByteMask())) {
+                // Not write-only operation.
+                flags = (flags & ~RATiedFlags::kOut) | (RATiedFlags::kRead | RATiedFlags::kUse);
+              }
+            }
+
+            // Do not use RegMem flag if changing Reg to Mem requires additional CPU feature that may not be enabled.
+            if (rwInfo.rmFeature() && Support::test(flags, RATiedFlags::kUseRM | RATiedFlags::kOutRM)) {
+              flags &= ~(RATiedFlags::kUseRM | RATiedFlags::kOutRM);
+            }
+
+            RegGroup group = workReg->group();
+            RegMask useRegs = _pass->_availableRegs[group] & allowedRegs;
+            RegMask outRegs = useRegs;
+
+            uint32_t useId = BaseReg::kIdBad;
+            uint32_t outId = BaseReg::kIdBad;
+
+            uint32_t useRewriteMask = 0;
+            uint32_t outRewriteMask = 0;
+
+            if (opRwInfo.consecutiveLeadCount()) {
+              // There must be a single consecutive register lead, otherwise the RW data is invalid.
+              if (consecutiveLeadId != Globals::kInvalidId)
+                return DebugUtils::errored(kErrorInvalidState);
+
+              // A consecutive lead register cannot be used as a consecutive +1/+2/+3 register, the registers must be distinct.
+              if (RATiedReg::consecutiveDataFromFlags(flags) != 0)
+                return DebugUtils::errored(kErrorNotConsecutiveRegs);
+
+              flags |= RATiedFlags::kLeadConsecutive | RATiedReg::consecutiveDataToFlags(opRwInfo.consecutiveLeadCount() - 1);
+              consecutiveLeadId = workReg->workId();
+
+              RegMask filter = raConsecutiveLeadCountToRegMaskFilter[opRwInfo.consecutiveLeadCount()];
+              if (Support::test(flags, RATiedFlags::kUse)) {
+                flags |= RATiedFlags::kUseConsecutive;
+                useRegs &= filter;
+              }
+              else {
+                flags |= RATiedFlags::kOutConsecutive;
+                outRegs &= filter;
+              }
+            }
+
+            if (Support::test(flags, RATiedFlags::kUse)) {
+              useRewriteMask = Support::bitMask(inst->getRewriteIndex(&reg._baseId));
+              if (opRwInfo.hasOpFlag(OpRWFlags::kRegPhysId)) {
+                useId = opRwInfo.physId();
+                flags |= RATiedFlags::kUseFixed;
+              }
+              else if (opRwInfo.hasOpFlag(OpRWFlags::kConsecutive)) {
+                if (consecutiveLeadId == Globals::kInvalidId)
+                  return DebugUtils::errored(kErrorInvalidState);
+
+                if (consecutiveLeadId == workReg->workId())
+                  return DebugUtils::errored(kErrorOverlappedRegs);
+
+                flags |= RATiedFlags::kUseConsecutive | RATiedReg::consecutiveDataToFlags(++consecutiveOffset);
+              }
+            }
+            else {
+              outRewriteMask = Support::bitMask(inst->getRewriteIndex(&reg._baseId));
+              if (opRwInfo.hasOpFlag(OpRWFlags::kRegPhysId)) {
+                outId = opRwInfo.physId();
+                flags |= RATiedFlags::kOutFixed;
+              }
+              else if (opRwInfo.hasOpFlag(OpRWFlags::kConsecutive)) {
+                if (consecutiveLeadId == Globals::kInvalidId)
+                  return DebugUtils::errored(kErrorInvalidState);
+
+                if (consecutiveLeadId == workReg->workId())
+                  return DebugUtils::errored(kErrorOverlappedRegs);
+
+                flags |= RATiedFlags::kOutConsecutive | RATiedReg::consecutiveDataToFlags(++consecutiveOffset);
+              }
+            }
+
+            ASMJIT_PROPAGATE(ib.add(workReg, flags, useRegs, useId, useRewriteMask, outRegs, outId, outRewriteMask, opRwInfo.rmSize(), consecutiveParent));
+            if (singleRegOps == i)
+              singleRegOps++;
+
+            if (Support::test(flags, RATiedFlags::kLeadConsecutive | RATiedFlags::kUseConsecutive | RATiedFlags::kOutConsecutive))
+              consecutiveParent = workReg->workId();
+          }
+        }
+        else if (op.isMem()) {
+          // Memory Operand
+          // --------------
+          const Mem& mem = op.as<Mem>();
+          ib.addForbiddenFlags(RATiedFlags::kUseRM | RATiedFlags::kOutRM);
+
+          if (mem.isRegHome()) {
+            RAWorkReg* workReg;
+            ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(mem.baseId()), &workReg));
+            _pass->getOrCreateStackSlot(workReg);
+          }
+          else if (mem.hasBaseReg()) {
+            uint32_t vIndex = Operand::virtIdToIndex(mem.baseId());
+            if (vIndex < Operand::kVirtIdCount) {
+              RAWorkReg* workReg;
+              ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+              RATiedFlags flags = raMemBaseRwFlags(opRwInfo.opFlags());
+              RegGroup group = workReg->group();
+              RegMask inOutRegs = _pass->_availableRegs[group];
+
+              uint32_t useId = BaseReg::kIdBad;
+              uint32_t outId = BaseReg::kIdBad;
+
+              uint32_t useRewriteMask = 0;
+              uint32_t outRewriteMask = 0;
+
+              if (Support::test(flags, RATiedFlags::kUse)) {
+                useRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._baseId));
+                if (opRwInfo.hasOpFlag(OpRWFlags::kMemPhysId)) {
+                  useId = opRwInfo.physId();
+                  flags |= RATiedFlags::kUseFixed;
+                }
+              }
+              else {
+                outRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._baseId));
+                if (opRwInfo.hasOpFlag(OpRWFlags::kMemPhysId)) {
+                  outId = opRwInfo.physId();
+                  flags |= RATiedFlags::kOutFixed;
+                }
+              }
+
+              ASMJIT_PROPAGATE(ib.add(workReg, flags, inOutRegs, useId, useRewriteMask, inOutRegs, outId, outRewriteMask));
+            }
+          }
+
+          if (mem.hasIndexReg()) {
+            uint32_t vIndex = Operand::virtIdToIndex(mem.indexId());
+            if (vIndex < Operand::kVirtIdCount) {
+              RAWorkReg* workReg;
+              ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+              RATiedFlags flags = raMemIndexRwFlags(opRwInfo.opFlags());
+              RegGroup group = workReg->group();
+              RegMask inOutRegs = _pass->_availableRegs[group] & instructionAllowedRegs;
+
+              // Index registers have never fixed id on X86/x64.
+              const uint32_t useId = BaseReg::kIdBad;
+              const uint32_t outId = BaseReg::kIdBad;
+
+              uint32_t useRewriteMask = 0;
+              uint32_t outRewriteMask = 0;
+
+              if (Support::test(flags, RATiedFlags::kUse))
+                useRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._data[Operand::kDataMemIndexId]));
+              else
+                outRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._data[Operand::kDataMemIndexId]));
+
+              ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRead, inOutRegs, useId, useRewriteMask, inOutRegs, outId, outRewriteMask));
+            }
+          }
+        }
+      }
+    }
+
+    // Handle extra operand (either REP {cx|ecx|rcx} or AVX-512 {k} selector).
+    if (inst->hasExtraReg()) {
+      uint32_t vIndex = Operand::virtIdToIndex(inst->extraReg().id());
+      if (vIndex < Operand::kVirtIdCount) {
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+        RegGroup group = workReg->group();
+        RegMask inOutRegs = _pass->_availableRegs[group];
+        uint32_t rewriteMask = Support::bitMask(inst->getRewriteIndex(&inst->extraReg()._id));
+
+        if (group == RegGroup::kX86_K) {
+          // AVX-512 mask selector {k} register - read-only, allocable to any register except {k0}.
+          ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRead, inOutRegs, BaseReg::kIdBad, rewriteMask, inOutRegs, BaseReg::kIdBad, 0));
+          singleRegOps = 0;
+        }
+        else {
+          // REP {cx|ecx|rcx} register - read & write, allocable to {cx|ecx|rcx} only.
+          ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRW, inOutRegs, Gp::kIdCx, rewriteMask, inOutRegs, Gp::kIdBad, 0));
+        }
+      }
+      else {
+        RegGroup group = inst->extraReg().group();
+        if (group == RegGroup::kX86_K && inst->extraReg().id() != 0)
+          singleRegOps = 0;
+      }
+    }
+
+    // If this instruction has move semantics then check whether it could be eliminated if all virtual registers
+    // are allocated into the same register. Take into account the virtual size of the destination register as that's
+    // more important than a physical register size in this case.
+    if (rwInfo.hasInstFlag(InstRWFlags::kMovOp) && !inst->hasExtraReg() && Support::bitTest(opTypesMask, uint32_t(OperandType::kReg))) {
+      // AVX+ move instructions have 3 operand form - the first two operands must be the same to guarantee move semantics.
+      if (opCount == 2 || (opCount == 3 && opArray[0] == opArray[1])) {
+        uint32_t vIndex = Operand::virtIdToIndex(opArray[0].as<Reg>().id());
+        if (vIndex < Operand::kVirtIdCount) {
+          const VirtReg* vReg = _cc->virtRegByIndex(vIndex);
+          const OpRWInfo& opRwInfo = rwInfo.operand(0);
+
+          uint64_t remainingByteMask = vReg->workReg()->regByteMask() & ~opRwInfo.writeByteMask();
+          if (remainingByteMask == 0u || (remainingByteMask & opRwInfo.extendByteMask()) == 0)
+            ib.addInstRWFlags(InstRWFlags::kMovOp);
+        }
+      }
+    }
+
+    // Handle X86 constraints.
+    if (hasGpbHiConstraint) {
+      for (RATiedReg& tiedReg : ib) {
+        RegMask filter = tiedReg.hasFlag(RATiedFlags::kX86_Gpb) ? 0x0Fu : 0xFFu;
+        tiedReg._useRegMask &= filter;
+        tiedReg._outRegMask &= filter;
+      }
+    }
+
+    if (ib.tiedRegCount() == 1) {
+      // Handle special cases of some instructions where all operands share the same
+      // register. In such case the single operand becomes read-only or write-only.
+      InstSameRegHint sameRegHint = InstSameRegHint::kNone;
+      if (singleRegOps == opCount) {
+        sameRegHint = instInfo.sameRegHint();
+      }
+      else if (opCount == 2 && inst->op(1).isImm()) {
+        // Handle some tricks used by X86 asm.
+        const BaseReg& reg = inst->op(0).as<BaseReg>();
+        const Imm& imm = inst->op(1).as<Imm>();
+
+        const RAWorkReg* workReg = _pass->workRegById(ib[0]->workId());
+        uint32_t workRegSize = workReg->signature().size();
+
+        switch (inst->id()) {
+          case Inst::kIdOr: {
+            // Sets the value of the destination register to -1, previous content unused.
+            if (reg.size() >= 4 || reg.size() >= workRegSize) {
+              if (imm.value() == -1 || imm.valueAs<uint64_t>() == raImmMaskFromSize(reg.size()))
+                sameRegHint = InstSameRegHint::kWO;
+            }
+            ASMJIT_FALLTHROUGH;
+          }
+
+          case Inst::kIdAdd:
+          case Inst::kIdAnd:
+          case Inst::kIdRol:
+          case Inst::kIdRor:
+          case Inst::kIdSar:
+          case Inst::kIdShl:
+          case Inst::kIdShr:
+          case Inst::kIdSub:
+          case Inst::kIdXor: {
+            // Updates [E|R]FLAGS without changing the content.
+            if (reg.size() != 4 || reg.size() >= workRegSize) {
+              if (imm.value() == 0)
+                sameRegHint = InstSameRegHint::kRO;
+            }
+            break;
+          }
+        }
+      }
+
+      switch (sameRegHint) {
+        case InstSameRegHint::kNone:
+          break;
+        case InstSameRegHint::kRO:
+          ib[0]->makeReadOnly();
+          break;
+        case InstSameRegHint::kWO:
+          ib[0]->makeWriteOnly();
+          break;
+      }
+    }
+
+    cf = instInfo.controlFlow();
+  }
+
+  return kErrorOk;
+}
+
+// x86::RACFGBuilder - OnInvoke
+// ============================
+
+Error RACFGBuilder::onBeforeInvoke(InvokeNode* invokeNode) noexcept {
+  const FuncDetail& fd = invokeNode->detail();
+  uint32_t argCount = invokeNode->argCount();
+
+  cc()->_setCursor(invokeNode->prev());
+  RegType nativeRegType = cc()->_gpSignature.regType();
+
+  for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+    const FuncValuePack& argPack = fd.argPack(argIndex);
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      if (!argPack[valueIndex])
+        break;
+
+      const FuncValue& arg = argPack[valueIndex];
+      const Operand& op = invokeNode->arg(argIndex, valueIndex);
+
+      if (op.isNone())
+        continue;
+
+      if (op.isReg()) {
+        const Reg& reg = op.as<Reg>();
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+        if (arg.isReg()) {
+          RegGroup regGroup = workReg->group();
+          RegGroup argGroup = Reg::groupOf(arg.regType());
+
+          if (arg.isIndirect()) {
+            if (reg.isGp()) {
+              if (reg.type() != nativeRegType)
+                return DebugUtils::errored(kErrorInvalidAssignment);
+              // It's considered allocated if this is an indirect argument and the user used GP.
+              continue;
+            }
+
+            BaseReg indirectReg;
+            moveVecToPtr(invokeNode, arg, reg.as<Vec>(), &indirectReg);
+            invokeNode->_args[argIndex][valueIndex] = indirectReg;
+          }
+          else {
+            if (regGroup != argGroup) {
+              // TODO: Conversion is not supported.
+              return DebugUtils::errored(kErrorInvalidAssignment);
+            }
+          }
+        }
+        else {
+          if (arg.isIndirect()) {
+            if (reg.isGp()) {
+              if (reg.type() != nativeRegType)
+                return DebugUtils::errored(kErrorInvalidAssignment);
+
+              ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, reg));
+              continue;
+            }
+
+            BaseReg indirectReg;
+            moveVecToPtr(invokeNode, arg, reg.as<Vec>(), &indirectReg);
+            ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, indirectReg));
+          }
+          else {
+            ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, reg));
+          }
+        }
+      }
+      else if (op.isImm()) {
+        if (arg.isReg()) {
+          BaseReg reg;
+          ASMJIT_PROPAGATE(moveImmToRegArg(invokeNode, arg, op.as<Imm>(), &reg));
+          invokeNode->_args[argIndex][valueIndex] = reg;
+        }
+        else {
+          ASMJIT_PROPAGATE(moveImmToStackArg(invokeNode, arg, op.as<Imm>()));
+        }
+      }
+    }
+  }
+
+  cc()->_setCursor(invokeNode);
+  if (fd.hasFlag(CallConvFlags::kCalleePopsStack) && fd.argStackSize() != 0)
+    ASMJIT_PROPAGATE(cc()->sub(cc()->zsp(), fd.argStackSize()));
+
+  if (fd.hasRet()) {
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      const FuncValue& ret = fd.ret(valueIndex);
+      if (!ret)
+        break;
+
+      const Operand& op = invokeNode->ret(valueIndex);
+      if (op.isReg()) {
+        const Reg& reg = op.as<Reg>();
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+        if (ret.isReg()) {
+          if (ret.regType() == RegType::kX86_St) {
+            if (workReg->group() != RegGroup::kVec)
+              return DebugUtils::errored(kErrorInvalidAssignment);
+
+            Reg dst(workReg->signature(), workReg->virtId());
+            Mem mem;
+
+            TypeId typeId = TypeUtils::scalarOf(workReg->typeId());
+            if (ret.hasTypeId())
+              typeId = ret.typeId();
+
+            switch (typeId) {
+              case TypeId::kFloat32:
+                ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 4, 4));
+                mem.setSize(4);
+                ASMJIT_PROPAGATE(cc()->fstp(mem));
+                ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovss, Inst::kIdVmovss), dst.as<Xmm>(), mem));
+                break;
+
+              case TypeId::kFloat64:
+                ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 8, 4));
+                mem.setSize(8);
+                ASMJIT_PROPAGATE(cc()->fstp(mem));
+                ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovsd, Inst::kIdVmovsd), dst.as<Xmm>(), mem));
+                break;
+
+              default:
+                return DebugUtils::errored(kErrorInvalidAssignment);
+            }
+          }
+          else {
+            RegGroup regGroup = workReg->group();
+            RegGroup retGroup = Reg::groupOf(ret.regType());
+
+            if (regGroup != retGroup) {
+              // TODO: Conversion is not supported.
+              return DebugUtils::errored(kErrorInvalidAssignment);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // This block has function call(s).
+  _curBlock->addFlags(RABlockFlags::kHasFuncCalls);
+  _pass->func()->frame().addAttributes(FuncAttributes::kHasFuncCalls);
+  _pass->func()->frame().updateCallStackSize(fd.argStackSize());
+
+  return kErrorOk;
+}
+
+Error RACFGBuilder::onInvoke(InvokeNode* invokeNode, RAInstBuilder& ib) noexcept {
+  uint32_t argCount = invokeNode->argCount();
+  const FuncDetail& fd = invokeNode->detail();
+
+  for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+    const FuncValuePack& argPack = fd.argPack(argIndex);
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      if (!argPack[valueIndex])
+        continue;
+
+      const FuncValue& arg = argPack[valueIndex];
+      const Operand& op = invokeNode->arg(argIndex, valueIndex);
+
+      if (op.isNone())
+        continue;
+
+      if (op.isReg()) {
+        const Reg& reg = op.as<Reg>();
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+        if (arg.isIndirect()) {
+          RegGroup regGroup = workReg->group();
+          if (regGroup != RegGroup::kGp)
+            return DebugUtils::errored(kErrorInvalidState);
+          ASMJIT_PROPAGATE(ib.addCallArg(workReg, arg.regId()));
+        }
+        else if (arg.isReg()) {
+          RegGroup regGroup = workReg->group();
+          RegGroup argGroup = Reg::groupOf(arg.regType());
+
+          if (regGroup == argGroup) {
+            ASMJIT_PROPAGATE(ib.addCallArg(workReg, arg.regId()));
+          }
+        }
+      }
+    }
+  }
+
+  for (uint32_t retIndex = 0; retIndex < Globals::kMaxValuePack; retIndex++) {
+    const FuncValue& ret = fd.ret(retIndex);
+    if (!ret)
+      break;
+
+    // Not handled here...
+    const Operand& op = invokeNode->ret(retIndex);
+    if (ret.regType() == RegType::kX86_St)
+      continue;
+
+    if (op.isReg()) {
+      const Reg& reg = op.as<Reg>();
+      RAWorkReg* workReg;
+      ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+      if (ret.isReg()) {
+        RegGroup regGroup = workReg->group();
+        RegGroup retGroup = Reg::groupOf(ret.regType());
+
+        if (regGroup == retGroup) {
+          ASMJIT_PROPAGATE(ib.addCallRet(workReg, ret.regId()));
+        }
+      }
+      else {
+        return DebugUtils::errored(kErrorInvalidAssignment);
+      }
+    }
+  }
+
+  // Setup clobbered registers.
+  for (RegGroup group : RegGroupVirtValues{})
+    ib._clobbered[group] = Support::lsbMask<RegMask>(_pass->_physRegCount[group]) & ~fd.preservedRegs(group);
+
+  return kErrorOk;
+}
+
+// x86::RACFGBuilder - MoveVecToPtr
+// ================================
+
+static inline OperandSignature x86VecRegSignatureBySize(uint32_t size) noexcept {
+  return OperandSignature{size >= 64 ? uint32_t(Zmm::kSignature) :
+                          size >= 32 ? uint32_t(Ymm::kSignature) : uint32_t(Xmm::kSignature)};
+}
+
+Error RACFGBuilder::moveVecToPtr(InvokeNode* invokeNode, const FuncValue& arg, const Vec& src, BaseReg* out) noexcept {
+  DebugUtils::unused(invokeNode);
+  ASMJIT_ASSERT(arg.isReg());
+
+  uint32_t argSize = TypeUtils::sizeOf(arg.typeId());
+  if (argSize == 0)
+    return DebugUtils::errored(kErrorInvalidState);
+
+  if (argSize < 16)
+    argSize = 16;
+
+  uint32_t argStackOffset = Support::alignUp(invokeNode->detail()._argStackSize, argSize);
+  _funcNode->frame().updateCallStackAlignment(argSize);
+  invokeNode->detail()._argStackSize = argStackOffset + argSize;
+
+  Vec vecReg(x86VecRegSignatureBySize(argSize), src.id());
+  Mem vecPtr = ptr(_pass->_sp.as<Gp>(), int32_t(argStackOffset));
+
+  uint32_t vMovInstId = choose(Inst::kIdMovaps, Inst::kIdVmovaps);
+  if (argSize > 16)
+    vMovInstId = Inst::kIdVmovaps;
+
+  ASMJIT_PROPAGATE(cc()->_newReg(out, ArchTraits::byArch(cc()->arch()).regTypeToTypeId(cc()->_gpSignature.regType()), nullptr));
+
+  VirtReg* vReg = cc()->virtRegById(out->id());
+  vReg->setWeight(BaseRAPass::kCallArgWeight);
+
+  ASMJIT_PROPAGATE(cc()->lea(out->as<Gp>(), vecPtr));
+  ASMJIT_PROPAGATE(cc()->emit(vMovInstId, ptr(out->as<Gp>()), vecReg));
+
+  if (arg.isStack()) {
+    Mem stackPtr = ptr(_pass->_sp.as<Gp>(), arg.stackOffset());
+    ASMJIT_PROPAGATE(cc()->mov(stackPtr, out->as<Gp>()));
+  }
+
+  return kErrorOk;
+}
+
+// x86::RACFGBuilder - MoveImmToRegArg
+// ===================================
+
+Error RACFGBuilder::moveImmToRegArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept {
+  DebugUtils::unused(invokeNode);
+  ASMJIT_ASSERT(arg.isReg());
+
+  Imm imm(imm_);
+  TypeId rTypeId = TypeId::kUInt32;
+
+  switch (arg.typeId()) {
+    case TypeId::kInt8: imm.signExtend8Bits(); goto MovU32;
+    case TypeId::kUInt8: imm.zeroExtend8Bits(); goto MovU32;
+    case TypeId::kInt16: imm.signExtend16Bits(); goto MovU32;
+    case TypeId::kUInt16: imm.zeroExtend16Bits(); goto MovU32;
+
+    case TypeId::kInt32:
+    case TypeId::kUInt32:
+MovU32:
+      imm.zeroExtend32Bits();
+      break;
+
+    case TypeId::kInt64:
+    case TypeId::kUInt64:
+      // Moving to GPD automatically zero extends in 64-bit mode.
+      if (imm.isUInt32()) {
+        imm.zeroExtend32Bits();
+        break;
+      }
+
+      rTypeId = TypeId::kUInt64;
+      break;
+
+    default:
+      return DebugUtils::errored(kErrorInvalidAssignment);
+  }
+
+  ASMJIT_PROPAGATE(cc()->_newReg(out, rTypeId, nullptr));
+  cc()->virtRegById(out->id())->setWeight(BaseRAPass::kCallArgWeight);
+
+  return cc()->mov(out->as<x86::Gp>(), imm);
+}
+
+// x86::RACFGBuilder - MoveImmToStackArg
+// =====================================
+
+Error RACFGBuilder::moveImmToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_) noexcept {
+  DebugUtils::unused(invokeNode);
+  ASMJIT_ASSERT(arg.isStack());
+
+  Mem stackPtr = ptr(_pass->_sp.as<Gp>(), arg.stackOffset());
+  Imm imm[2];
+
+  stackPtr.setSize(4);
+  imm[0] = imm_;
+  uint32_t nMovs = 0;
+
+  // One stack entry has the same size as the native register size. That means that if we want to move a 32-bit
+  // integer on the stack in 64-bit mode, we need to extend it to a 64-bit integer first. In 32-bit mode, pushing
+  // a 64-bit on stack is done in two steps by pushing low and high parts separately.
+  switch (arg.typeId()) {
+    case TypeId::kInt8: imm[0].signExtend8Bits(); goto MovU32;
+    case TypeId::kUInt8: imm[0].zeroExtend8Bits(); goto MovU32;
+    case TypeId::kInt16: imm[0].signExtend16Bits(); goto MovU32;
+    case TypeId::kUInt16: imm[0].zeroExtend16Bits(); goto MovU32;
+
+    case TypeId::kInt32:
+    case TypeId::kUInt32:
+    case TypeId::kFloat32:
+MovU32:
+      imm[0].zeroExtend32Bits();
+      nMovs = 1;
+      break;
+
+    case TypeId::kInt64:
+    case TypeId::kUInt64:
+    case TypeId::kFloat64:
+    case TypeId::kMmx32:
+    case TypeId::kMmx64:
+      if (_is64Bit && imm[0].isInt32()) {
+        stackPtr.setSize(8);
+        nMovs = 1;
+        break;
+      }
+
+      imm[1].setValue(imm[0].uint32Hi());
+      imm[0].zeroExtend32Bits();
+      nMovs = 2;
+      break;
+
+    default:
+      return DebugUtils::errored(kErrorInvalidAssignment);
+  }
+
+  for (uint32_t i = 0; i < nMovs; i++) {
+    ASMJIT_PROPAGATE(cc()->mov(stackPtr, imm[i]));
+    stackPtr.addOffsetLo32(int32_t(stackPtr.size()));
+  }
+
+  return kErrorOk;
+}
+
+// x86::RACFGBuilder - MoveRegToStackArg
+// =====================================
+
+Error RACFGBuilder::moveRegToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const BaseReg& reg) noexcept {
+  DebugUtils::unused(invokeNode);
+  ASMJIT_ASSERT(arg.isStack());
+
+  Mem stackPtr = ptr(_pass->_sp.as<Gp>(), arg.stackOffset());
+  Reg r0, r1;
+
+  VirtReg* vr = cc()->virtRegById(reg.id());
+  uint32_t registerSize = cc()->registerSize();
+  InstId instId = 0;
+
+  TypeId dstTypeId = arg.typeId();
+  TypeId srcTypeId = vr->typeId();
+
+  switch (dstTypeId) {
+    case TypeId::kInt64:
+    case TypeId::kUInt64:
+      // Extend BYTE->QWORD (GP).
+      if (TypeUtils::isGp8(srcTypeId)) {
+        r1.setRegT<RegType::kX86_GpbLo>(reg.id());
+
+        instId = (dstTypeId == TypeId::kInt64 && srcTypeId == TypeId::kInt8) ? Inst::kIdMovsx : Inst::kIdMovzx;
+        goto ExtendMovGpXQ;
+      }
+
+      // Extend WORD->QWORD (GP).
+      if (TypeUtils::isGp16(srcTypeId)) {
+        r1.setRegT<RegType::kX86_Gpw>(reg.id());
+
+        instId = (dstTypeId == TypeId::kInt64 && srcTypeId == TypeId::kInt16) ? Inst::kIdMovsx : Inst::kIdMovzx;
+        goto ExtendMovGpXQ;
+      }
+
+      // Extend DWORD->QWORD (GP).
+      if (TypeUtils::isGp32(srcTypeId)) {
+        r1.setRegT<RegType::kX86_Gpd>(reg.id());
+
+        instId = Inst::kIdMovsxd;
+        if (dstTypeId == TypeId::kInt64 && srcTypeId == TypeId::kInt32)
+          goto ExtendMovGpXQ;
+        else
+          goto ZeroExtendGpDQ;
+      }
+
+      // Move QWORD (GP).
+      if (TypeUtils::isGp64(srcTypeId)) goto MovGpQ;
+      if (TypeUtils::isMmx(srcTypeId)) goto MovMmQ;
+      if (TypeUtils::isVec(srcTypeId)) goto MovXmmQ;
+      break;
+
+    case TypeId::kInt32:
+    case TypeId::kUInt32:
+    case TypeId::kInt16:
+    case TypeId::kUInt16:
+      // DWORD <- WORD (Zero|Sign Extend).
+      if (TypeUtils::isGp16(srcTypeId)) {
+        bool isDstSigned = dstTypeId == TypeId::kInt16 || dstTypeId == TypeId::kInt32;
+        bool isSrcSigned = srcTypeId == TypeId::kInt8  || srcTypeId == TypeId::kInt16;
+
+        r1.setRegT<RegType::kX86_Gpw>(reg.id());
+        instId = isDstSigned && isSrcSigned ? Inst::kIdMovsx : Inst::kIdMovzx;
+        goto ExtendMovGpD;
+      }
+
+      // DWORD <- BYTE (Zero|Sign Extend).
+      if (TypeUtils::isGp8(srcTypeId)) {
+        bool isDstSigned = dstTypeId == TypeId::kInt16 || dstTypeId == TypeId::kInt32;
+        bool isSrcSigned = srcTypeId == TypeId::kInt8  || srcTypeId == TypeId::kInt16;
+
+        r1.setRegT<RegType::kX86_GpbLo>(reg.id());
+        instId = isDstSigned && isSrcSigned ? Inst::kIdMovsx : Inst::kIdMovzx;
+        goto ExtendMovGpD;
+      }
+      ASMJIT_FALLTHROUGH;
+
+    case TypeId::kInt8:
+    case TypeId::kUInt8:
+      if (TypeUtils::isInt(srcTypeId)) goto MovGpD;
+      if (TypeUtils::isMmx(srcTypeId)) goto MovMmD;
+      if (TypeUtils::isVec(srcTypeId)) goto MovXmmD;
+      break;
+
+    case TypeId::kMmx32:
+    case TypeId::kMmx64:
+      // Extend BYTE->QWORD (GP).
+      if (TypeUtils::isGp8(srcTypeId)) {
+        r1.setRegT<RegType::kX86_GpbLo>(reg.id());
+
+        instId = Inst::kIdMovzx;
+        goto ExtendMovGpXQ;
+      }
+
+      // Extend WORD->QWORD (GP).
+      if (TypeUtils::isGp16(srcTypeId)) {
+        r1.setRegT<RegType::kX86_Gpw>(reg.id());
+
+        instId = Inst::kIdMovzx;
+        goto ExtendMovGpXQ;
+      }
+
+      if (TypeUtils::isGp32(srcTypeId)) goto ExtendMovGpDQ;
+      if (TypeUtils::isGp64(srcTypeId)) goto MovGpQ;
+      if (TypeUtils::isMmx(srcTypeId)) goto MovMmQ;
+      if (TypeUtils::isVec(srcTypeId)) goto MovXmmQ;
+      break;
+
+    case TypeId::kFloat32:
+    case TypeId::kFloat32x1:
+      if (TypeUtils::isVec(srcTypeId)) goto MovXmmD;
+      break;
+
+    case TypeId::kFloat64:
+    case TypeId::kFloat64x1:
+      if (TypeUtils::isVec(srcTypeId)) goto MovXmmQ;
+      break;
+
+    default:
+      if (TypeUtils::isVec(dstTypeId) && reg.as<Reg>().isVec()) {
+        stackPtr.setSize(TypeUtils::sizeOf(dstTypeId));
+        uint32_t vMovInstId = choose(Inst::kIdMovaps, Inst::kIdVmovaps);
+
+        if (TypeUtils::isVec128(dstTypeId))
+          r0.setRegT<RegType::kX86_Xmm>(reg.id());
+        else if (TypeUtils::isVec256(dstTypeId))
+          r0.setRegT<RegType::kX86_Ymm>(reg.id());
+        else if (TypeUtils::isVec512(dstTypeId))
+          r0.setRegT<RegType::kX86_Zmm>(reg.id());
+        else
+          break;
+
+        return cc()->emit(vMovInstId, stackPtr, r0);
+      }
+      break;
+  }
+  return DebugUtils::errored(kErrorInvalidAssignment);
+
+  // Extend+Move Gp.
+ExtendMovGpD:
+  stackPtr.setSize(4);
+  r0.setRegT<RegType::kX86_Gpd>(reg.id());
+
+  ASMJIT_PROPAGATE(cc()->emit(instId, r0, r1));
+  ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, stackPtr, r0));
+  return kErrorOk;
+
+ExtendMovGpXQ:
+  if (registerSize == 8) {
+    stackPtr.setSize(8);
+    r0.setRegT<RegType::kX86_Gpq>(reg.id());
+
+    ASMJIT_PROPAGATE(cc()->emit(instId, r0, r1));
+    ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, stackPtr, r0));
+  }
+  else {
+    stackPtr.setSize(4);
+    r0.setRegT<RegType::kX86_Gpd>(reg.id());
+
+    ASMJIT_PROPAGATE(cc()->emit(instId, r0, r1));
+
+ExtendMovGpDQ:
+    ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, stackPtr, r0));
+    stackPtr.addOffsetLo32(4);
+    ASMJIT_PROPAGATE(cc()->emit(Inst::kIdAnd, stackPtr, 0));
+  }
+  return kErrorOk;
+
+ZeroExtendGpDQ:
+  stackPtr.setSize(4);
+  r0.setRegT<RegType::kX86_Gpd>(reg.id());
+  goto ExtendMovGpDQ;
+
+MovGpD:
+  stackPtr.setSize(4);
+  r0.setRegT<RegType::kX86_Gpd>(reg.id());
+  return cc()->emit(Inst::kIdMov, stackPtr, r0);
+
+MovGpQ:
+  stackPtr.setSize(8);
+  r0.setRegT<RegType::kX86_Gpq>(reg.id());
+  return cc()->emit(Inst::kIdMov, stackPtr, r0);
+
+MovMmD:
+  stackPtr.setSize(4);
+  r0.setRegT<RegType::kX86_Mm>(reg.id());
+  return cc()->emit(choose(Inst::kIdMovd, Inst::kIdVmovd), stackPtr, r0);
+
+MovMmQ:
+  stackPtr.setSize(8);
+  r0.setRegT<RegType::kX86_Mm>(reg.id());
+  return cc()->emit(choose(Inst::kIdMovq, Inst::kIdVmovq), stackPtr, r0);
+
+MovXmmD:
+  stackPtr.setSize(4);
+  r0.setRegT<RegType::kX86_Xmm>(reg.id());
+  return cc()->emit(choose(Inst::kIdMovss, Inst::kIdVmovss), stackPtr, r0);
+
+MovXmmQ:
+  stackPtr.setSize(8);
+  r0.setRegT<RegType::kX86_Xmm>(reg.id());
+  return cc()->emit(choose(Inst::kIdMovlps, Inst::kIdVmovlps), stackPtr, r0);
+}
+
+// x86::RACFGBuilder - OnReg
+// =========================
+
+Error RACFGBuilder::onBeforeRet(FuncRetNode* funcRet) noexcept {
+  const FuncDetail& funcDetail = _pass->func()->detail();
+  const Operand* opArray = funcRet->operands();
+  uint32_t opCount = funcRet->opCount();
+
+  cc()->_setCursor(funcRet->prev());
+
+  for (uint32_t i = 0; i < opCount; i++) {
+    const Operand& op = opArray[i];
+    const FuncValue& ret = funcDetail.ret(i);
+
+    if (!op.isReg())
+      continue;
+
+    if (ret.regType() == RegType::kX86_St) {
+      const Reg& reg = op.as<Reg>();
+      uint32_t vIndex = Operand::virtIdToIndex(reg.id());
+
+      if (vIndex < Operand::kVirtIdCount) {
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+        if (workReg->group() != RegGroup::kVec)
+          return DebugUtils::errored(kErrorInvalidAssignment);
+
+        Reg src(workReg->signature(), workReg->virtId());
+        Mem mem;
+
+        TypeId typeId = TypeUtils::scalarOf(workReg->typeId());
+        if (ret.hasTypeId())
+          typeId = ret.typeId();
+
+        switch (typeId) {
+          case TypeId::kFloat32:
+            ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 4, 4));
+            mem.setSize(4);
+            ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovss, Inst::kIdVmovss), mem, src.as<Xmm>()));
+            ASMJIT_PROPAGATE(cc()->fld(mem));
+            break;
+
+          case TypeId::kFloat64:
+            ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 8, 4));
+            mem.setSize(8);
+            ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovsd, Inst::kIdVmovsd), mem, src.as<Xmm>()));
+            ASMJIT_PROPAGATE(cc()->fld(mem));
+            break;
+
+          default:
+            return DebugUtils::errored(kErrorInvalidAssignment);
+        }
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+Error RACFGBuilder::onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept {
+  const FuncDetail& funcDetail = _pass->func()->detail();
+  const Operand* opArray = funcRet->operands();
+  uint32_t opCount = funcRet->opCount();
+
+  for (uint32_t i = 0; i < opCount; i++) {
+    const Operand& op = opArray[i];
+    if (op.isNone()) continue;
+
+    const FuncValue& ret = funcDetail.ret(i);
+    if (ASMJIT_UNLIKELY(!ret.isReg()))
+      return DebugUtils::errored(kErrorInvalidAssignment);
+
+    // Not handled here...
+    if (ret.regType() == RegType::kX86_St)
+      continue;
+
+    if (op.isReg()) {
+      // Register return value.
+      const Reg& reg = op.as<Reg>();
+      uint32_t vIndex = Operand::virtIdToIndex(reg.id());
+
+      if (vIndex < Operand::kVirtIdCount) {
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+        RegGroup group = workReg->group();
+        RegMask inOutRegs = _pass->_availableRegs[group];
+        ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRead, inOutRegs, ret.regId(), 0, inOutRegs, BaseReg::kIdBad, 0));
+      }
+    }
+    else {
+      return DebugUtils::errored(kErrorInvalidAssignment);
+    }
+  }
+
+  return kErrorOk;
+}
+
+// x86::X86RAPass - Construction & Destruction
+// ===========================================
+
+X86RAPass::X86RAPass() noexcept
+  : BaseRAPass() { _iEmitHelper = &_emitHelper; }
+X86RAPass::~X86RAPass() noexcept {}
+
+// x86::X86RAPass - OnInit & OnDone
+// ================================
+
+void X86RAPass::onInit() noexcept {
+  Arch arch = cc()->arch();
+  uint32_t baseRegCount = Environment::is32Bit(arch) ? 8u : 16u;
+  uint32_t simdRegCount = baseRegCount;
+
+  if (Environment::is64Bit(arch) && _func->frame().isAvx512Enabled())
+    simdRegCount = 32u;
+
+  bool avxEnabled = _func->frame().isAvxEnabled();
+  bool avx512Enabled = _func->frame().isAvx512Enabled();
+
+  _emitHelper._emitter = _cb;
+  _emitHelper._avxEnabled = avxEnabled || avx512Enabled;
+  _emitHelper._avx512Enabled = avx512Enabled;
+
+  _archTraits = &ArchTraits::byArch(arch);
+  _physRegCount.set(RegGroup::kGp, baseRegCount);
+  _physRegCount.set(RegGroup::kVec, simdRegCount);
+  _physRegCount.set(RegGroup::kX86_K, 8);
+  _physRegCount.set(RegGroup::kX86_MM, 8);
+  _buildPhysIndex();
+
+  _availableRegCount = _physRegCount;
+  _availableRegs[RegGroup::kGp] = Support::lsbMask<RegMask>(_physRegCount.get(RegGroup::kGp));
+  _availableRegs[RegGroup::kVec] = Support::lsbMask<RegMask>(_physRegCount.get(RegGroup::kVec));
+  _availableRegs[RegGroup::kX86_K] = Support::lsbMask<RegMask>(_physRegCount.get(RegGroup::kX86_K)) ^ 1u;
+  _availableRegs[RegGroup::kX86_MM] = Support::lsbMask<RegMask>(_physRegCount.get(RegGroup::kX86_MM));
+
+  _scratchRegIndexes[0] = uint8_t(Gp::kIdCx);
+  _scratchRegIndexes[1] = uint8_t(baseRegCount - 1);
+
+  // The architecture specific setup makes implicitly all registers available. So
+  // make unavailable all registers that are special and cannot be used in general.
+  bool hasFP = _func->frame().hasPreservedFP();
+
+  makeUnavailable(RegGroup::kGp, Gp::kIdSp);            // ESP|RSP used as a stack-pointer (SP).
+  if (hasFP) makeUnavailable(RegGroup::kGp, Gp::kIdBp); // EBP|RBP used as a frame-pointer (FP).
+
+  _sp = cc()->zsp();
+  _fp = cc()->zbp();
+}
+
+void X86RAPass::onDone() noexcept {}
+
+// x86::X86RAPass - BuildCFG
+// =========================
+
+Error X86RAPass::buildCFG() noexcept {
+  return RACFGBuilder(this).run();
+}
+
+// x86::X86RAPass - Rewrite
+// ========================
+
+static InstId transformVexToEvex(InstId instId) {
+  switch (instId) {
+    case Inst::kIdVbroadcastf128: return Inst::kIdVbroadcastf32x4;
+    case Inst::kIdVbroadcasti128: return Inst::kIdVbroadcasti32x4;
+    case Inst::kIdVextractf128: return Inst::kIdVextractf32x4;
+    case Inst::kIdVextracti128: return Inst::kIdVextracti32x4;
+    case Inst::kIdVinsertf128: return Inst::kIdVinsertf32x4;
+    case Inst::kIdVinserti128: return Inst::kIdVinserti32x4;
+    case Inst::kIdVmovdqa: return Inst::kIdVmovdqa32;
+    case Inst::kIdVmovdqu: return Inst::kIdVmovdqu32;
+    case Inst::kIdVpand: return Inst::kIdVpandd;
+    case Inst::kIdVpandn: return Inst::kIdVpandnd;
+    case Inst::kIdVpor: return Inst::kIdVpord;
+    case Inst::kIdVpxor: return Inst::kIdVpxord;
+    case Inst::kIdVroundpd: return Inst::kIdVrndscalepd;
+    case Inst::kIdVroundps: return Inst::kIdVrndscaleps;
+    case Inst::kIdVroundsd: return Inst::kIdVrndscalesd;
+    case Inst::kIdVroundss: return Inst::kIdVrndscaless;
+
+    default:
+      // This should never happen as only transformable instructions should go this path.
+      ASMJIT_ASSERT(false);
+      return 0;
+  }
+}
+
+ASMJIT_FAVOR_SPEED Error X86RAPass::_rewrite(BaseNode* first, BaseNode* stop) noexcept {
+  uint32_t virtCount = cc()->_vRegArray.size();
+
+  BaseNode* node = first;
+  while (node != stop) {
+    BaseNode* next = node->next();
+    if (node->isInst()) {
+      InstNode* inst = node->as<InstNode>();
+      RAInst* raInst = node->passData<RAInst>();
+
+      Operand* operands = inst->operands();
+      uint32_t opCount = inst->opCount();
+      uint32_t maxRegId = 0;
+
+      uint32_t i;
+
+      // Rewrite virtual registers into physical registers.
+      if (raInst) {
+        // This data is allocated by Zone passed to `runOnFunction()`, which will be reset after the RA pass finishes.
+        // So reset this data to prevent having a dead pointer after the RA pass is complete.
+        node->resetPassData();
+
+        // If the instruction contains pass data (raInst) then it was a subject for register allocation and must be
+        // rewritten to use physical regs.
+        RATiedReg* tiedRegs = raInst->tiedRegs();
+        uint32_t tiedCount = raInst->tiedCount();
+
+        for (i = 0; i < tiedCount; i++) {
+          RATiedReg* tiedReg = &tiedRegs[i];
+
+          Support::BitWordIterator<uint32_t> useIt(tiedReg->useRewriteMask());
+          uint32_t useId = tiedReg->useId();
+          while (useIt.hasNext()) {
+            maxRegId = Support::max(maxRegId, useId);
+            inst->rewriteIdAtIndex(useIt.next(), useId);
+          }
+
+          Support::BitWordIterator<uint32_t> outIt(tiedReg->outRewriteMask());
+          uint32_t outId = tiedReg->outId();
+          while (outIt.hasNext()) {
+            maxRegId = Support::max(maxRegId, outId);
+            inst->rewriteIdAtIndex(outIt.next(), outId);
+          }
+        }
+
+        // Transform VEX instruction to EVEX when necessary.
+        if (raInst->isTransformable()) {
+          if (maxRegId > 15) {
+            inst->setId(transformVexToEvex(inst->id()));
+          }
+        }
+
+        // Remove moves that do not do anything.
+        //
+        // Usually these moves are inserted during code generation and originally they used different registers. If RA
+        // allocated these into the same register such redundant mov would appear.
+        if (raInst->hasInstRWFlag(InstRWFlags::kMovOp) && !inst->hasExtraReg()) {
+          if (inst->opCount() == 2) {
+            if (inst->op(0) == inst->op(1)) {
+              cc()->removeNode(node);
+              goto Next;
+            }
+          }
+        }
+
+        if (ASMJIT_UNLIKELY(node->type() != NodeType::kInst)) {
+          // FuncRet terminates the flow, it must either be removed if the exit label is next to it (optimization) or
+          // patched to an architecture dependent jump instruction that jumps to the function's exit before the epilog.
+          if (node->type() == NodeType::kFuncRet) {
+            RABlock* block = raInst->block();
+            if (!isNextTo(node, _func->exitNode())) {
+              cc()->_setCursor(node->prev());
+              ASMJIT_PROPAGATE(emitJump(_func->exitNode()->label()));
+            }
+
+            BaseNode* prev = node->prev();
+            cc()->removeNode(node);
+            block->setLast(prev);
+          }
+        }
+      }
+
+      // Rewrite stack slot addresses.
+      for (i = 0; i < opCount; i++) {
+        Operand& op = operands[i];
+        if (op.isMem()) {
+          BaseMem& mem = op.as<BaseMem>();
+          if (mem.isRegHome()) {
+            uint32_t virtIndex = Operand::virtIdToIndex(mem.baseId());
+            if (ASMJIT_UNLIKELY(virtIndex >= virtCount))
+              return DebugUtils::errored(kErrorInvalidVirtId);
+
+            VirtReg* virtReg = cc()->virtRegByIndex(virtIndex);
+            RAWorkReg* workReg = virtReg->workReg();
+            ASMJIT_ASSERT(workReg != nullptr);
+
+            RAStackSlot* slot = workReg->stackSlot();
+            int32_t offset = slot->offset();
+
+            mem._setBase(_sp.type(), slot->baseRegId());
+            mem.clearRegHome();
+            mem.addOffsetLo32(offset);
+          }
+        }
+      }
+    }
+
+Next:
+    node = next;
+  }
+
+  return kErrorOk;
+}
+
+// x86::X86RAPass - OnEmit
+// =======================
+
+Error X86RAPass::emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept {
+  RAWorkReg* wReg = workRegById(workId);
+  BaseReg dst(wReg->signature(), dstPhysId);
+  BaseReg src(wReg->signature(), srcPhysId);
+
+  const char* comment = nullptr;
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
+    _tmpString.assignFormat("<MOVE> %s", workRegById(workId)->name());
+    comment = _tmpString.data();
+  }
+#endif
+
+  return _emitHelper.emitRegMove(dst, src, wReg->typeId(), comment);
+}
+
+Error X86RAPass::emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept {
+  RAWorkReg* waReg = workRegById(aWorkId);
+  RAWorkReg* wbReg = workRegById(bWorkId);
+
+  bool is64Bit = Support::max(waReg->typeId(), wbReg->typeId()) >= TypeId::kInt64;
+  OperandSignature sign = is64Bit ? OperandSignature{RegTraits<RegType::kX86_Gpq>::kSignature}
+                                  : OperandSignature{RegTraits<RegType::kX86_Gpd>::kSignature};
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
+    _tmpString.assignFormat("<SWAP> %s, %s", waReg->name(), wbReg->name());
+    cc()->setInlineComment(_tmpString.data());
+  }
+#endif
+
+  return cc()->emit(Inst::kIdXchg, Reg(sign, aPhysId), Reg(sign, bPhysId));
+}
+
+Error X86RAPass::emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept {
+  RAWorkReg* wReg = workRegById(workId);
+  BaseReg dstReg(wReg->signature(), dstPhysId);
+  BaseMem srcMem(workRegAsMem(wReg));
+
+  const char* comment = nullptr;
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
+    _tmpString.assignFormat("<LOAD> %s", workRegById(workId)->name());
+    comment = _tmpString.data();
+  }
+#endif
+
+  return _emitHelper.emitRegMove(dstReg, srcMem, wReg->typeId(), comment);
+}
+
+Error X86RAPass::emitSave(uint32_t workId, uint32_t srcPhysId) noexcept {
+  RAWorkReg* wReg = workRegById(workId);
+  BaseMem dstMem(workRegAsMem(wReg));
+  BaseReg srcReg(wReg->signature(), srcPhysId);
+
+  const char* comment = nullptr;
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
+    _tmpString.assignFormat("<SAVE> %s", workRegById(workId)->name());
+    comment = _tmpString.data();
+  }
+#endif
+
+  return _emitHelper.emitRegMove(dstMem, srcReg, wReg->typeId(), comment);
+}
+
+Error X86RAPass::emitJump(const Label& label) noexcept {
+  return cc()->jmp(label);
+}
+
+Error X86RAPass::emitPreCall(InvokeNode* invokeNode) noexcept {
+  if (invokeNode->detail().hasVarArgs() && cc()->is64Bit()) {
+    const FuncDetail& fd = invokeNode->detail();
+    uint32_t argCount = invokeNode->argCount();
+
+    switch (invokeNode->detail().callConv().id()) {
+      case CallConvId::kX64SystemV: {
+        // AL register contains the number of arguments passed in XMM register(s).
+        uint32_t n = 0;
+        for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+          const FuncValuePack& argPack = fd.argPack(argIndex);
+          for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+            const FuncValue& arg = argPack[valueIndex];
+            if (!arg)
+              break;
+
+            if (arg.isReg() && Reg::groupOf(arg.regType()) == RegGroup::kVec)
+              n++;
+          }
+        }
+
+        if (!n)
+          ASMJIT_PROPAGATE(cc()->xor_(eax, eax));
+        else
+          ASMJIT_PROPAGATE(cc()->mov(eax, n));
+        break;
+      }
+
+      case CallConvId::kX64Windows: {
+        // Each double-precision argument passed in XMM must be also passed in GP.
+        for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+          const FuncValuePack& argPack = fd.argPack(argIndex);
+          for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+            const FuncValue& arg = argPack[valueIndex];
+            if (!arg)
+              break;
+
+            if (arg.isReg() && Reg::groupOf(arg.regType()) == RegGroup::kVec) {
+              Gp dst = gpq(fd.callConv().passedOrder(RegGroup::kGp)[argIndex]);
+              Xmm src = xmm(arg.regId());
+              ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovq, Inst::kIdVmovq), dst, src));
+            }
+          }
+        }
+        break;
+      }
+
+      default:
+        return DebugUtils::errored(kErrorInvalidState);
+    }
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86 && !ASMJIT_NO_COMPILER
diff --git a/lib/lepton/asmjit/x86/x86rapass_p.h b/lib/lepton/asmjit/x86/x86rapass_p.h
new file mode 100644
index 0000000000..b9603c21af
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86rapass_p.h
@@ -0,0 +1,94 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86RAPASS_P_H_INCLUDED
+#define ASMJIT_X86_X86RAPASS_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/compiler.h"
+#include "../core/rabuilders_p.h"
+#include "../core/rapass_p.h"
+#include "../x86/x86assembler.h"
+#include "../x86/x86compiler.h"
+#include "../x86/x86emithelper_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86 register allocation pass.
+//!
+//! Takes care of generating function prologs and epilogs, and also performs register allocation.
+class X86RAPass : public BaseRAPass {
+public:
+  ASMJIT_NONCOPYABLE(X86RAPass)
+  typedef BaseRAPass Base;
+
+  EmitHelper _emitHelper;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  X86RAPass() noexcept;
+  virtual ~X86RAPass() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the compiler casted to `x86::Compiler`.
+  inline Compiler* cc() const noexcept { return static_cast<Compiler*>(_cb); }
+
+  //! Returns emit helper.
+  inline EmitHelper* emitHelper() noexcept { return &_emitHelper; }
+
+  inline bool avxEnabled() const noexcept { return _emitHelper._avxEnabled; }
+  inline bool avx512Enabled() const noexcept { return _emitHelper._avx512Enabled; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline uint32_t choose(uint32_t sseInstId, uint32_t avxInstId) noexcept {
+    return avxEnabled() ? avxInstId : sseInstId;
+  }
+
+  //! \}
+
+  //! \name Interface
+  //! \{
+
+  void onInit() noexcept override;
+  void onDone() noexcept override;
+
+  Error buildCFG() noexcept override;
+
+  Error _rewrite(BaseNode* first, BaseNode* stop) noexcept override;
+
+  Error emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept override;
+  Error emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept override;
+
+  Error emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept override;
+  Error emitSave(uint32_t workId, uint32_t srcPhysId) noexcept override;
+
+  Error emitJump(const Label& label) noexcept override;
+  Error emitPreCall(InvokeNode* invokeNode) noexcept override;
+
+  //! \}
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_X86_X86RAPASS_P_H_INCLUDED
diff --git a/lib/colvars/lepton/include/Lepton.h b/lib/lepton/include/Lepton.h
similarity index 97%
rename from lib/colvars/lepton/include/Lepton.h
rename to lib/lepton/include/Lepton.h
index 22edcb3ff9..73b6b6fa38 100644
--- a/lib/colvars/lepton/include/Lepton.h
+++ b/lib/lepton/include/Lepton.h
@@ -1,5 +1,5 @@
-#ifndef LEPTON_H_
-#define LEPTON_H_
+#ifndef LMP_LEPTON_H_
+#define LMP_LEPTON_H_
 
 /* -------------------------------------------------------------------------- *
  *                                   Lepton                                   *
@@ -40,4 +40,4 @@
 #include "lepton/ParsedExpression.h"
 #include "lepton/Parser.h"
 
-#endif /*LEPTON_H_*/
+#endif /*LMP_LEPTON_H_*/
diff --git a/lib/colvars/lepton/include/lepton/CompiledExpression.h b/lib/lepton/include/lepton/CompiledExpression.h
similarity index 100%
rename from lib/colvars/lepton/include/lepton/CompiledExpression.h
rename to lib/lepton/include/lepton/CompiledExpression.h
diff --git a/lib/colvars/lepton/include/lepton/CompiledVectorExpression.h b/lib/lepton/include/lepton/CompiledVectorExpression.h
similarity index 100%
rename from lib/colvars/lepton/include/lepton/CompiledVectorExpression.h
rename to lib/lepton/include/lepton/CompiledVectorExpression.h
diff --git a/lib/colvars/lepton/include/lepton/CustomFunction.h b/lib/lepton/include/lepton/CustomFunction.h
similarity index 97%
rename from lib/colvars/lepton/include/lepton/CustomFunction.h
rename to lib/lepton/include/lepton/CustomFunction.h
index fbb0ddd52a..cbfff26637 100644
--- a/lib/colvars/lepton/include/lepton/CustomFunction.h
+++ b/lib/lepton/include/lepton/CustomFunction.h
@@ -91,10 +91,10 @@ public:
     int getNumArguments() const {
         return numArgs;
     }
-    double evaluate(const double* arguments) const {
+    double evaluate(const double* ) const {
         return 0.0;
     }
-    double evaluateDerivative(const double* arguments, const int* derivOrder) const {
+    double evaluateDerivative(const double* , const int* ) const {
         return 0.0;
     }
     CustomFunction* clone() const {
diff --git a/lib/colvars/lepton/include/lepton/Exception.h b/lib/lepton/include/lepton/Exception.h
similarity index 100%
rename from lib/colvars/lepton/include/lepton/Exception.h
rename to lib/lepton/include/lepton/Exception.h
diff --git a/lib/colvars/lepton/include/lepton/ExpressionProgram.h b/lib/lepton/include/lepton/ExpressionProgram.h
similarity index 100%
rename from lib/colvars/lepton/include/lepton/ExpressionProgram.h
rename to lib/lepton/include/lepton/ExpressionProgram.h
diff --git a/lib/colvars/lepton/include/lepton/ExpressionTreeNode.h b/lib/lepton/include/lepton/ExpressionTreeNode.h
similarity index 100%
rename from lib/colvars/lepton/include/lepton/ExpressionTreeNode.h
rename to lib/lepton/include/lepton/ExpressionTreeNode.h
diff --git a/lib/colvars/lepton/include/lepton/Operation.h b/lib/lepton/include/lepton/Operation.h
similarity index 96%
rename from lib/colvars/lepton/include/lepton/Operation.h
rename to lib/lepton/include/lepton/Operation.h
index 1ddde0b8c0..165a56b332 100644
--- a/lib/colvars/lepton/include/lepton/Operation.h
+++ b/lib/lepton/include/lepton/Operation.h
@@ -177,7 +177,7 @@ public:
     Operation* clone() const {
         return new Constant(value);
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* , const std::map<std::string, double>& ) const {
         return value;
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -208,7 +208,7 @@ public:
     Operation* clone() const {
         return new Variable(name);
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* , const std::map<std::string, double>& variables) const {
         std::map<std::string, double>::const_iterator iter = variables.find(name);
         if (iter == variables.end())
             throw Exception("No value specified for variable "+name);
@@ -253,7 +253,7 @@ public:
         clone->derivOrder = derivOrder;
         return clone;
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         if (isDerivative)
             return function->evaluateDerivative(args, &derivOrder[0]);
         return function->evaluate(args);
@@ -289,7 +289,7 @@ public:
     Operation* clone() const {
         return new Add();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return args[0]+args[1];
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -317,7 +317,7 @@ public:
     Operation* clone() const {
         return new Subtract();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return args[0]-args[1];
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -342,7 +342,7 @@ public:
     Operation* clone() const {
         return new Multiply();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return args[0]*args[1];
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -370,7 +370,7 @@ public:
     Operation* clone() const {
         return new Divide();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return args[0]/args[1];
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -395,7 +395,7 @@ public:
     Operation* clone() const {
         return new Power();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::pow(args[0], args[1]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -420,7 +420,7 @@ public:
     Operation* clone() const {
         return new Negate();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return -args[0];
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -442,7 +442,7 @@ public:
     Operation* clone() const {
         return new Sqrt();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::sqrt(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -464,7 +464,7 @@ public:
     Operation* clone() const {
         return new Exp();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::exp(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -486,7 +486,7 @@ public:
     Operation* clone() const {
         return new Log();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::log(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -508,7 +508,7 @@ public:
     Operation* clone() const {
         return new Sin();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::sin(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -530,7 +530,7 @@ public:
     Operation* clone() const {
         return new Cos();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::cos(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -552,7 +552,7 @@ public:
     Operation* clone() const {
         return new Sec();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return 1.0/std::cos(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -574,7 +574,7 @@ public:
     Operation* clone() const {
         return new Csc();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return 1.0/std::sin(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -596,7 +596,7 @@ public:
     Operation* clone() const {
         return new Tan();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::tan(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -618,7 +618,7 @@ public:
     Operation* clone() const {
         return new Cot();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return 1.0/std::tan(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -640,7 +640,7 @@ public:
     Operation* clone() const {
         return new Asin();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::asin(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -662,7 +662,7 @@ public:
     Operation* clone() const {
         return new Acos();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::acos(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -684,7 +684,7 @@ public:
     Operation* clone() const {
         return new Atan();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::atan(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -706,7 +706,7 @@ public:
     Operation* clone() const {
         return new Atan2();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::atan2(args[0], args[1]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -728,7 +728,7 @@ public:
     Operation* clone() const {
         return new Sinh();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::sinh(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -750,7 +750,7 @@ public:
     Operation* clone() const {
         return new Cosh();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::cosh(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -772,7 +772,7 @@ public:
     Operation* clone() const {
         return new Tanh();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::tanh(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -834,7 +834,7 @@ public:
     Operation* clone() const {
         return new Step();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return (args[0] >= 0.0 ? 1.0 : 0.0);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -856,7 +856,7 @@ public:
     Operation* clone() const {
         return new Delta();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return (args[0] == 0.0 ? 1.0 : 0.0);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -878,7 +878,7 @@ public:
     Operation* clone() const {
         return new Square();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return args[0]*args[0];
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -900,7 +900,7 @@ public:
     Operation* clone() const {
         return new Cube();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return args[0]*args[0]*args[0];
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -922,7 +922,7 @@ public:
     Operation* clone() const {
         return new Reciprocal();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return 1.0/args[0];
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -946,7 +946,7 @@ public:
     Operation* clone() const {
         return new AddConstant(value);
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return args[0]+value;
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -979,7 +979,7 @@ public:
     Operation* clone() const {
         return new MultiplyConstant(value);
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return args[0]*value;
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -1014,7 +1014,7 @@ public:
     Operation* clone() const {
         return new PowerConstant(value);
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         if (isIntPower) {
             // Integer powers can be computed much more quickly by repeated multiplication.
 
@@ -1069,7 +1069,7 @@ public:
     Operation* clone() const {
         return new Min();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         // parens around (std::min) are workaround for horrible microsoft max/min macro trouble
         return (std::min)(args[0], args[1]);
     }
@@ -1092,7 +1092,7 @@ public:
     Operation* clone() const {
         return new Max();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         // parens around (std::min) are workaround for horrible microsoft max/min macro trouble
         return (std::max)(args[0], args[1]);
     }
@@ -1115,7 +1115,7 @@ public:
     Operation* clone() const {
         return new Abs();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::abs(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -1138,7 +1138,7 @@ public:
     Operation* clone() const {
         return new Floor();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::floor(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -1160,7 +1160,7 @@ public:
     Operation* clone() const {
         return new Ceil();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::ceil(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -1182,7 +1182,7 @@ public:
     Operation* clone() const {
         return new Select();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>&) const {
         return (args[0] != 0.0 ? args[1] : args[2]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
diff --git a/lib/colvars/lepton/include/lepton/ParsedExpression.h b/lib/lepton/include/lepton/ParsedExpression.h
similarity index 100%
rename from lib/colvars/lepton/include/lepton/ParsedExpression.h
rename to lib/lepton/include/lepton/ParsedExpression.h
diff --git a/lib/colvars/lepton/include/lepton/Parser.h b/lib/lepton/include/lepton/Parser.h
similarity index 100%
rename from lib/colvars/lepton/include/lepton/Parser.h
rename to lib/lepton/include/lepton/Parser.h
diff --git a/lib/colvars/lepton/include/lepton/windowsIncludes.h b/lib/lepton/include/lepton/windowsIncludes.h
similarity index 100%
rename from lib/colvars/lepton/include/lepton/windowsIncludes.h
rename to lib/lepton/include/lepton/windowsIncludes.h
diff --git a/lib/colvars/lepton/src/CompiledExpression.cpp b/lib/lepton/src/CompiledExpression.cpp
similarity index 97%
rename from lib/colvars/lepton/src/CompiledExpression.cpp
rename to lib/lepton/src/CompiledExpression.cpp
index d8b6e112b2..bdc5350ce1 100644
--- a/lib/colvars/lepton/src/CompiledExpression.cpp
+++ b/lib/lepton/src/CompiledExpression.cpp
@@ -32,6 +32,8 @@
 #include "lepton/CompiledExpression.h"
 #include "lepton/Operation.h"
 #include "lepton/ParsedExpression.h"
+
+#include <cinttypes>
 #include <utility>
 
 using namespace Lepton;
@@ -88,7 +90,7 @@ void CompiledExpression::compileExpression(const ExpressionTreeNode& node, vecto
     // Process the child nodes.
 
     vector<int> args;
-    for (int i = 0; i < node.getChildren().size(); i++) {
+    for (int i = 0; i < (int)node.getChildren().size(); i++) {
         compileExpression(node.getChildren()[i], temps);
         args.push_back(findTempIndex(node.getChildren()[i], temps));
     }
@@ -110,7 +112,7 @@ void CompiledExpression::compileExpression(const ExpressionTreeNode& node, vecto
             // If the arguments are sequential, we can just pass a pointer to the first one.
 
             bool sequential = true;
-            for (int i = 1; i < args.size(); i++)
+            for (int i = 1; i < (int)args.size(); i++)
                 if (args[i] != args[i-1]+1)
                     sequential = false;
             if (sequential)
@@ -165,17 +167,17 @@ void CompiledExpression::setVariableLocations(map<string, double*>& variableLoca
 double CompiledExpression::evaluate() const {
     if (jitCode)
         return jitCode();
-    for (int i = 0; i < variablesToCopy.size(); i++)
+    for (int i = 0; i < (int)variablesToCopy.size(); i++)
         *variablesToCopy[i].first = *variablesToCopy[i].second;
 
     // Loop over the operations and evaluate each one.
 
-    for (int step = 0; step < operation.size(); step++) {
+    for (int step = 0; step < (int)operation.size(); step++) {
         const vector<int>& args = arguments[step];
         if (args.size() == 1)
             workspace[target[step]] = operation[step]->evaluate(&workspace[args[0]], dummyVariables);
         else {
-            for (int i = 0; i < args.size(); i++)
+          for (int i = 0; i < (int)args.size(); i++)
                 argValues[i] = workspace[args[i]];
             workspace[target[step]] = operation[step]->evaluate(&argValues[0], dummyVariables);
         }
@@ -194,7 +196,7 @@ void CompiledExpression::findPowerGroups(vector<vector<int> >& groups, vector<ve
 
     vector<int> stepPower(operation.size(), 0);
     vector<int> stepArg(operation.size(), -1);
-    for (int step = 0; step < operation.size(); step++) {
+    for (int step = 0; step < (int)operation.size(); step++) {
         Operation& op = *operation[step];
         int power = 0;
         if (op.getId() == Operation::SQUARE)
@@ -215,11 +217,11 @@ void CompiledExpression::findPowerGroups(vector<vector<int> >& groups, vector<ve
     // Find groups that operate on the same argument and whose powers have the same sign.
 
     stepGroup.resize(operation.size(), -1);
-    for (int i = 0; i < operation.size(); i++) {
+    for (int i = 0; i < (int)operation.size(); i++) {
         if (stepGroup[i] != -1)
             continue;
         vector<int> group, power;
-        for (int j = i; j < operation.size(); j++) {
+        for (int j = i; j < (int)operation.size(); j++) {
             if (stepArg[i] == stepArg[j] && stepPower[i]*stepPower[j] > 0) {
                 stepGroup[j] = groups.size();
                 group.push_back(j);
@@ -513,6 +515,13 @@ void CompiledExpression::generateTwoArgCall(a64::Compiler& c, arm::Vec& dest, ar
     invoke->setRet(0, dest);
 }
 #else
+
+union int64_to_double {
+  int64_to_double(const int64_t &_i) { i = _i; }
+  int64_t i;
+  double  d;
+};
+
 void CompiledExpression::generateJitCode() {
     const CpuInfo& cpu = CpuInfo::host();
     if (!cpu.hasFeature(CpuFeatures::X86::kAVX))
@@ -560,10 +569,8 @@ void CompiledExpression::generateJitCode() {
             value = 1.0;
         else if (op.getId() == Operation::DELTA)
             value = 1.0;
-        else if (op.getId() == Operation::ABS) {
-            long long mask = 0x7FFFFFFFFFFFFFFF;
-            value = *reinterpret_cast<double*>(&mask);
-        }
+        else if (op.getId() == Operation::ABS)
+          value = int64_to_double(0x7FFFFFFFFFFFFFFF).d;
         else if (op.getId() == Operation::POWER_CONSTANT) {
             if (stepGroup[step] == -1)
                 value = dynamic_cast<Operation::PowerConstant&>(op).getValue();
@@ -616,14 +623,14 @@ void CompiledExpression::generateJitCode() {
                 c.vmovsd(multiplier, workspaceVar[arguments[step][0]], workspaceVar[arguments[step][0]]);
             else {
                 c.vdivsd(multiplier, constantVar[operationConstantIndex[step]], workspaceVar[arguments[step][0]]);
-                for (int i = 0; i < powers.size(); i++)
+                for (int i = 0; i < (int)powers.size(); i++)
                     powers[i] = -powers[i];
             }
             vector<bool> hasAssigned(group.size(), false);
             bool done = false;
             while (!done) {
                 done = true;
-                for (int i = 0; i < group.size(); i++) {
+                for (int i = 0; i < (int)group.size(); i++) {
                     if (powers[i]%2 == 1) {
                         if (!hasAssigned[i])
                             c.vmovsd(workspaceVar[target[group[i]]], multiplier, multiplier);
diff --git a/lib/colvars/lepton/src/CompiledVectorExpression.cpp b/lib/lepton/src/CompiledVectorExpression.cpp
similarity index 97%
rename from lib/colvars/lepton/src/CompiledVectorExpression.cpp
rename to lib/lepton/src/CompiledVectorExpression.cpp
index 7c01a986bb..302cefaabc 100644
--- a/lib/colvars/lepton/src/CompiledVectorExpression.cpp
+++ b/lib/lepton/src/CompiledVectorExpression.cpp
@@ -44,7 +44,7 @@ using namespace asmjit;
 CompiledVectorExpression::CompiledVectorExpression() : jitCode(NULL) {
 }
 
-CompiledVectorExpression::CompiledVectorExpression(const ParsedExpression& expression, int width) : jitCode(NULL), width(width) {
+CompiledVectorExpression::CompiledVectorExpression(const ParsedExpression& expression, int width) : width(width), jitCode(NULL) {
     const vector<int> allowedWidths = getAllowedWidths();
     if (find(allowedWidths.begin(), allowedWidths.end(), width) == allowedWidths.end())
         throw Exception("Unsupported width for vector expression: "+to_string(width));
@@ -108,7 +108,7 @@ void CompiledVectorExpression::compileExpression(const ExpressionTreeNode& node,
     // Process the child nodes.
 
     vector<int> args;
-    for (int i = 0; i < node.getChildren().size(); i++) {
+    for (int i = 0; i < (int)node.getChildren().size(); i++) {
         compileExpression(node.getChildren()[i], temps, workspaceSize);
         args.push_back(findTempIndex(node.getChildren()[i], temps));
     }
@@ -130,7 +130,7 @@ void CompiledVectorExpression::compileExpression(const ExpressionTreeNode& node,
             // If the arguments are sequential, we can just pass a pointer to the first one.
 
             bool sequential = true;
-            for (int i = 1; i < args.size(); i++)
+            for (int i = 1; i < (int)args.size(); i++)
                 if (args[i] != args[i - 1] + 1)
                     sequential = false;
             if (sequential)
@@ -191,13 +191,13 @@ const float* CompiledVectorExpression::evaluate() const {
         jitCode();
         return &workspace[workspace.size()-width];
     }
-    for (int i = 0; i < variablesToCopy.size(); i++)
+    for (int i = 0; i < (int)variablesToCopy.size(); i++)
         for (int j = 0; j < width; j++)
             variablesToCopy[i].first[j] = variablesToCopy[i].second[j];
 
     // Loop over the operations and evaluate each one.
 
-    for (int step = 0; step < operation.size(); step++) {
+    for (int step = 0; step < (int)operation.size(); step++) {
         const vector<int>& args = arguments[step];
         if (args.size() == 1) {
             for (int j = 0; j < width; j++) {
@@ -207,7 +207,7 @@ const float* CompiledVectorExpression::evaluate() const {
             }
         } else {
             for (int j = 0; j < width; j++) {
-                for (int i = 0; i < args.size(); i++)
+              for (int i = 0; i < (int)args.size(); i++)
                     argValues[i] = workspace[args[i]*width+j];
                 workspace[target[step]*width+j] = operation[step]->evaluate(&argValues[0], dummyVariables);
             }
@@ -228,7 +228,7 @@ void CompiledVectorExpression::findPowerGroups(vector<vector<int> >& groups, vec
 
     vector<int> stepPower(operation.size(), 0);
     vector<int> stepArg(operation.size(), -1);
-    for (int step = 0; step < operation.size(); step++) {
+    for (int step = 0; step < (int)operation.size(); step++) {
         Operation& op = *operation[step];
         int power = 0;
         if (op.getId() == Operation::SQUARE)
@@ -249,11 +249,11 @@ void CompiledVectorExpression::findPowerGroups(vector<vector<int> >& groups, vec
     // Find groups that operate on the same argument and whose powers have the same sign.
 
     stepGroup.resize(operation.size(), -1);
-    for (int i = 0; i < operation.size(); i++) {
+    for (int i = 0; i < (int)operation.size(); i++) {
         if (stepGroup[i] != -1)
             continue;
         vector<int> group, power;
-        for (int j = i; j < operation.size(); j++) {
+        for (int j = i; j < (int)operation.size(); j++) {
             if (stepArg[i] == stepArg[j] && stepPower[i] * stepPower[j] > 0) {
                 stepGroup[j] = groups.size();
                 group.push_back(j);
@@ -573,6 +573,12 @@ void CompiledVectorExpression::generateTwoArgCall(a64::Compiler& c, arm::Vec& de
 }
 #else
 
+union int_to_float {
+  int_to_float(const int &_i) { i = _i; }
+  int i;
+  float  f;
+};
+
 void CompiledVectorExpression::generateJitCode() {
     const CpuInfo& cpu = CpuInfo::host();
     if (!cpu.hasFeature(CpuFeatures::X86::kAVX))
@@ -623,10 +629,8 @@ void CompiledVectorExpression::generateJitCode() {
             value = 1.0;
         else if (op.getId() == Operation::DELTA)
             value = 1.0;
-        else if (op.getId() == Operation::ABS) {
-            int mask = 0x7FFFFFFF;
-            value = *reinterpret_cast<float*>(&mask);
-        }
+        else if (op.getId() == Operation::ABS)
+            value = int_to_float(0x7FFFFFFF).f;
         else if (op.getId() == Operation::POWER_CONSTANT) {
             if (stepGroup[step] == -1)
                 value = dynamic_cast<Operation::PowerConstant&> (op).getValue();
@@ -681,14 +685,14 @@ void CompiledVectorExpression::generateJitCode() {
                 c.vmovdqu(multiplier, workspaceVar[arguments[step][0]]);
             else {
                 c.vdivps(multiplier, constantVar[operationConstantIndex[step]], workspaceVar[arguments[step][0]]);
-                for (int i = 0; i < powers.size(); i++)
+                for (int i = 0; i < (int)powers.size(); i++)
                     powers[i] = -powers[i];
             }
             vector<bool> hasAssigned(group.size(), false);
             bool done = false;
             while (!done) {
                 done = true;
-                for (int i = 0; i < group.size(); i++) {
+                for (int i = 0; i < (int)group.size(); i++) {
                     if (powers[i] % 2 == 1) {
                         if (!hasAssigned[i])
                             c.vmovdqu(workspaceVar[target[group[i]]], multiplier);
diff --git a/lib/colvars/lepton/src/ExpressionProgram.cpp b/lib/lepton/src/ExpressionProgram.cpp
similarity index 100%
rename from lib/colvars/lepton/src/ExpressionProgram.cpp
rename to lib/lepton/src/ExpressionProgram.cpp
diff --git a/lib/colvars/lepton/src/ExpressionTreeNode.cpp b/lib/lepton/src/ExpressionTreeNode.cpp
similarity index 94%
rename from lib/colvars/lepton/src/ExpressionTreeNode.cpp
rename to lib/lepton/src/ExpressionTreeNode.cpp
index b7a376528d..758515b123 100644
--- a/lib/colvars/lepton/src/ExpressionTreeNode.cpp
+++ b/lib/lepton/src/ExpressionTreeNode.cpp
@@ -38,25 +38,25 @@ using namespace Lepton;
 using namespace std;
 
 ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const vector<ExpressionTreeNode>& children) : operation(operation), children(children) {
-    if (operation->getNumArguments() != children.size())
+  if (operation->getNumArguments() != (int)children.size())
         throw Exception("wrong number of arguments to function: "+operation->getName());
 }
 
 ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child1, const ExpressionTreeNode& child2) : operation(operation) {
     children.push_back(child1);
     children.push_back(child2);
-    if (operation->getNumArguments() != children.size())
+    if (operation->getNumArguments() != (int)children.size())
         throw Exception("wrong number of arguments to function: "+operation->getName());
 }
 
 ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child) : operation(operation) {
     children.push_back(child);
-    if (operation->getNumArguments() != children.size())
+    if (operation->getNumArguments() != (int)children.size())
         throw Exception("wrong number of arguments to function: "+operation->getName());
 }
 
 ExpressionTreeNode::ExpressionTreeNode(Operation* operation) : operation(operation) {
-    if (operation->getNumArguments() != children.size())
+  if (operation->getNumArguments() != (int)children.size())
         throw Exception("wrong number of arguments to function: "+operation->getName());
 }
 
@@ -130,13 +130,13 @@ void ExpressionTreeNode::assignTags(vector<const ExpressionTreeNode*>& examples)
     int numTags = examples.size();
     for (const ExpressionTreeNode& child : getChildren())
         child.assignTags(examples);
-    if (numTags == examples.size()) {
+    if (numTags == (int)examples.size()) {
         // All the children matched existing tags, so possibly this node does too.
 
-        for (int i = 0; i < examples.size(); i++) {
+      for (int i = 0; i < (int)examples.size(); i++) {
             const ExpressionTreeNode& example = *examples[i];
             bool matches = (getChildren().size() == example.getChildren().size() && getOperation() == example.getOperation());
-            for (int j = 0; matches && j < getChildren().size(); j++)
+            for (int j = 0; matches && j < (int)getChildren().size(); j++)
                 if (getChildren()[j].tag != example.getChildren()[j].tag)
                     matches = false;
             if (matches) {
diff --git a/lib/colvars/lepton/src/MSVC_erfc.h b/lib/lepton/src/MSVC_erfc.h
similarity index 100%
rename from lib/colvars/lepton/src/MSVC_erfc.h
rename to lib/lepton/src/MSVC_erfc.h
diff --git a/lib/colvars/lepton/src/Operation.cpp b/lib/lepton/src/Operation.cpp
similarity index 88%
rename from lib/colvars/lepton/src/Operation.cpp
rename to lib/lepton/src/Operation.cpp
index b5a958b2f7..8bddc8d1c8 100644
--- a/lib/colvars/lepton/src/Operation.cpp
+++ b/lib/lepton/src/Operation.cpp
@@ -43,25 +43,25 @@ static bool isZero(const ExpressionTreeNode& node) {
     return dynamic_cast<const Operation::Constant&>(node.getOperation()).getValue() == 0.0;
 }
 
-double Operation::Erf::evaluate(double* args, const map<string, double>& variables) const {
+double Operation::Erf::evaluate(double* args, const map<string, double>&) const {
     return erf(args[0]);
 }
 
-double Operation::Erfc::evaluate(double* args, const map<string, double>& variables) const {
+double Operation::Erfc::evaluate(double* args, const map<string, double>& ) const {
     return erfc(args[0]);
 }
 
-ExpressionTreeNode Operation::Constant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Constant::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& , const std::string& ) const {
     return ExpressionTreeNode(new Operation::Constant(0.0));
 }
 
-ExpressionTreeNode Operation::Variable::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Variable::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& , const std::string& variable) const {
     if (variable == name)
         return ExpressionTreeNode(new Operation::Constant(1.0));
     return ExpressionTreeNode(new Operation::Constant(0.0));
 }
 
-ExpressionTreeNode Operation::Custom::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Custom::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (function->getNumArguments() == 0)
         return ExpressionTreeNode(new Operation::Constant(0.0));
     ExpressionTreeNode result;
@@ -83,7 +83,7 @@ ExpressionTreeNode Operation::Custom::differentiate(const std::vector<Expression
     return ExpressionTreeNode(new Operation::Constant(0.0));
 }
 
-ExpressionTreeNode Operation::Add::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Add::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return childDerivs[1];
     if (isZero(childDerivs[1]))
@@ -91,7 +91,7 @@ ExpressionTreeNode Operation::Add::differentiate(const std::vector<ExpressionTre
     return ExpressionTreeNode(new Operation::Add(), childDerivs[0], childDerivs[1]);
 }
 
-ExpressionTreeNode Operation::Subtract::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Subtract::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0])) {
         if (isZero(childDerivs[1]))
             return ExpressionTreeNode(new Operation::Constant(0.0));
@@ -102,7 +102,7 @@ ExpressionTreeNode Operation::Subtract::differentiate(const std::vector<Expressi
     return ExpressionTreeNode(new Operation::Subtract(), childDerivs[0], childDerivs[1]);
 }
 
-ExpressionTreeNode Operation::Multiply::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Multiply::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0])) {
         if (isZero(childDerivs[1]))
             return ExpressionTreeNode(new Operation::Constant(0.0));
@@ -115,7 +115,7 @@ ExpressionTreeNode Operation::Multiply::differentiate(const std::vector<Expressi
                               ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]));
 }
 
-ExpressionTreeNode Operation::Divide::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Divide::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     ExpressionTreeNode subexp;
     if (isZero(childDerivs[0])) {
         if (isZero(childDerivs[1]))
@@ -131,7 +131,7 @@ ExpressionTreeNode Operation::Divide::differentiate(const std::vector<Expression
     return ExpressionTreeNode(new Operation::Divide(), subexp, ExpressionTreeNode(new Operation::Square(), children[1]));
 }
 
-ExpressionTreeNode Operation::Power::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Power::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Add(),
                               ExpressionTreeNode(new Operation::Multiply(),
                                                  ExpressionTreeNode(new Operation::Multiply(),
@@ -146,13 +146,13 @@ ExpressionTreeNode Operation::Power::differentiate(const std::vector<ExpressionT
                                                  childDerivs[1]));
 }
 
-ExpressionTreeNode Operation::Negate::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Negate::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Negate(), childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Sqrt::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Sqrt::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -162,7 +162,7 @@ ExpressionTreeNode Operation::Sqrt::differentiate(const std::vector<ExpressionTr
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Exp::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Exp::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -170,7 +170,7 @@ ExpressionTreeNode Operation::Exp::differentiate(const std::vector<ExpressionTre
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Log::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Log::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -178,7 +178,7 @@ ExpressionTreeNode Operation::Log::differentiate(const std::vector<ExpressionTre
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Sin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Sin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -186,7 +186,7 @@ ExpressionTreeNode Operation::Sin::differentiate(const std::vector<ExpressionTre
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Cos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Cos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -195,7 +195,7 @@ ExpressionTreeNode Operation::Cos::differentiate(const std::vector<ExpressionTre
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Sec::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Sec::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -205,7 +205,7 @@ ExpressionTreeNode Operation::Sec::differentiate(const std::vector<ExpressionTre
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Csc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Csc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -216,7 +216,7 @@ ExpressionTreeNode Operation::Csc::differentiate(const std::vector<ExpressionTre
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Tan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Tan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -225,7 +225,7 @@ ExpressionTreeNode Operation::Tan::differentiate(const std::vector<ExpressionTre
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Cot::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Cot::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -235,7 +235,7 @@ ExpressionTreeNode Operation::Cot::differentiate(const std::vector<ExpressionTre
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Asin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Asin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -247,7 +247,7 @@ ExpressionTreeNode Operation::Asin::differentiate(const std::vector<ExpressionTr
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Acos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Acos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -260,7 +260,7 @@ ExpressionTreeNode Operation::Acos::differentiate(const std::vector<ExpressionTr
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Atan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Atan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -270,7 +270,7 @@ ExpressionTreeNode Operation::Atan::differentiate(const std::vector<ExpressionTr
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Atan2::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Atan2::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Divide(),
                               ExpressionTreeNode(new Operation::Subtract(),
                                                  ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]),
@@ -280,7 +280,7 @@ ExpressionTreeNode Operation::Atan2::differentiate(const std::vector<ExpressionT
                                                  ExpressionTreeNode(new Operation::Square(), children[1])));
 }
 
-ExpressionTreeNode Operation::Sinh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Sinh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -289,7 +289,7 @@ ExpressionTreeNode Operation::Sinh::differentiate(const std::vector<ExpressionTr
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Cosh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Cosh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -298,7 +298,7 @@ ExpressionTreeNode Operation::Cosh::differentiate(const std::vector<ExpressionTr
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Tanh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Tanh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -309,7 +309,7 @@ ExpressionTreeNode Operation::Tanh::differentiate(const std::vector<ExpressionTr
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Erf::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Erf::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -321,7 +321,7 @@ ExpressionTreeNode Operation::Erf::differentiate(const std::vector<ExpressionTre
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Erfc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Erfc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -333,15 +333,15 @@ ExpressionTreeNode Operation::Erfc::differentiate(const std::vector<ExpressionTr
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Step::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Step::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& , const std::string& ) const {
     return ExpressionTreeNode(new Operation::Constant(0.0));
 }
 
-ExpressionTreeNode Operation::Delta::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Delta::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& , const std::string& ) const {
     return ExpressionTreeNode(new Operation::Constant(0.0));
 }
 
-ExpressionTreeNode Operation::Square::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Square::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -350,7 +350,7 @@ ExpressionTreeNode Operation::Square::differentiate(const std::vector<Expression
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Cube::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Cube::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -359,7 +359,7 @@ ExpressionTreeNode Operation::Cube::differentiate(const std::vector<ExpressionTr
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Reciprocal::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Reciprocal::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -369,18 +369,18 @@ ExpressionTreeNode Operation::Reciprocal::differentiate(const std::vector<Expres
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::AddConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::AddConstant::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return childDerivs[0];
 }
 
-ExpressionTreeNode Operation::MultiplyConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::MultiplyConstant::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::MultiplyConstant(value),
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::PowerConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::PowerConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
@@ -390,19 +390,19 @@ ExpressionTreeNode Operation::PowerConstant::differentiate(const std::vector<Exp
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Min::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Min::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     ExpressionTreeNode step(new Operation::Step(),
                             ExpressionTreeNode(new Operation::Subtract(), children[0], children[1]));
     return ExpressionTreeNode(new Operation::Select(), {step, childDerivs[1], childDerivs[0]});
 }
 
-ExpressionTreeNode Operation::Max::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Max::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     ExpressionTreeNode step(new Operation::Step(),
                             ExpressionTreeNode(new Operation::Subtract(), children[0], children[1]));
     return ExpressionTreeNode(new Operation::Select(), {step, childDerivs[0], childDerivs[1]});
 }
 
-ExpressionTreeNode Operation::Abs::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Abs::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (isZero(childDerivs[0]))
         return ExpressionTreeNode(new Operation::Constant(0.0));
     ExpressionTreeNode step(new Operation::Step(), children[0]);
@@ -412,14 +412,14 @@ ExpressionTreeNode Operation::Abs::differentiate(const std::vector<ExpressionTre
                                                  ExpressionTreeNode(new Operation::MultiplyConstant(2), step)));
 }
 
-ExpressionTreeNode Operation::Floor::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Floor::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& , const std::string& ) const {
     return ExpressionTreeNode(new Operation::Constant(0.0));
 }
 
-ExpressionTreeNode Operation::Ceil::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Ceil::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& , const std::string& ) const {
     return ExpressionTreeNode(new Operation::Constant(0.0));
 }
 
-ExpressionTreeNode Operation::Select::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Select::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Select(), {children[0], childDerivs[1], childDerivs[2]});
 }
diff --git a/lib/colvars/lepton/src/ParsedExpression.cpp b/lib/lepton/src/ParsedExpression.cpp
similarity index 98%
rename from lib/colvars/lepton/src/ParsedExpression.cpp
rename to lib/lepton/src/ParsedExpression.cpp
index ea2cf707d6..9ff0b2aaf7 100644
--- a/lib/colvars/lepton/src/ParsedExpression.cpp
+++ b/lib/lepton/src/ParsedExpression.cpp
@@ -155,8 +155,9 @@ ExpressionTreeNode ParsedExpression::substituteSimplerExpression(const Expressio
 
     // Collect some info on constant expressions in children
     bool first_const = children.size() > 0 && isConstant(children[0]); // is first child constant?
-    bool second_const = children.size() > 1 && isConstant(children[1]); ; // is second child constant?
+    bool second_const = children.size() > 1 && isConstant(children[1]); // is second child constant?
     double first, second; // if yes, value of first and second child
+    first = second = 0.0;
     if (first_const)
         first = getConstantValue(children[0]);
     if (second_const)
@@ -319,11 +320,19 @@ ExpressionTreeNode ParsedExpression::substituteSimplerExpression(const Expressio
         {
             if (children[0].getOperation().getId() == Operation::SQUARE) // sqrt(square(x)) = abs(x)
                 return ExpressionTreeNode(new Operation::Abs(), children[0].getChildren()[0]);
+            break;
         }
         case Operation::SQUARE:
         {
             if (children[0].getOperation().getId() == Operation::SQRT) // square(sqrt(x)) = x
                 return children[0].getChildren()[0];
+            break;
+        }
+        case Operation::SELECT:
+        {
+            if (children[1] == children[2]) // Select between two identical values
+                return children[1];
+            break;
         }
         default:
         {
diff --git a/lib/colvars/lepton/src/Parser.cpp b/lib/lepton/src/Parser.cpp
similarity index 98%
rename from lib/colvars/lepton/src/Parser.cpp
rename to lib/lepton/src/Parser.cpp
index e284add258..2829b443b6 100644
--- a/lib/colvars/lepton/src/Parser.cpp
+++ b/lib/lepton/src/Parser.cpp
@@ -178,7 +178,7 @@ ParsedExpression Parser::parse(const string& expression, const map<string, Custo
             vector<ParseToken> tokens = tokenize(subexpressions[i].substr(equalsPos+1));
             int pos = 0;
             subexpDefs[name] = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
-            if (pos != tokens.size())
+            if (pos != (int)tokens.size())
                 throw Exception("unexpected text at end of subexpression: "+tokens[pos].getText());
         }
 
@@ -187,7 +187,7 @@ ParsedExpression Parser::parse(const string& expression, const map<string, Custo
         vector<ParseToken> tokens = tokenize(primaryExpression);
         int pos = 0;
         ExpressionTreeNode result = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
-        if (pos != tokens.size())
+        if (pos != (int)tokens.size())
             throw Exception("unexpected text at end of expression: "+tokens[pos].getText());
         return ParsedExpression(result);
     }
@@ -198,7 +198,7 @@ ParsedExpression Parser::parse(const string& expression, const map<string, Custo
 
 ExpressionTreeNode Parser::parsePrecedence(const vector<ParseToken>& tokens, int& pos, const map<string, CustomFunction*>& customFunctions,
             const map<string, ExpressionTreeNode>& subexpressionDefs, int precedence) {
-    if (pos == tokens.size())
+  if (pos == (int)tokens.size())
         throw Exception("unexpected end of expression");
 
     // Parse the next value (number, variable, function, parenthesized expression)
@@ -224,7 +224,7 @@ ExpressionTreeNode Parser::parsePrecedence(const vector<ParseToken>& tokens, int
     else if (token.getType() == ParseToken::LeftParen) {
         pos++;
         result = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0);
-        if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
+        if (pos == (int)tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
             throw Exception("unbalanced parentheses");
         pos++;
     }
@@ -238,7 +238,7 @@ ExpressionTreeNode Parser::parsePrecedence(const vector<ParseToken>& tokens, int
             if (moreArgs)
                 pos++;
         } while (moreArgs);
-        if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
+        if (pos == (int)tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
             throw Exception("unbalanced parentheses");
         pos++;
         Operation* op = getFunctionOperation(token.getText(), customFunctions);
diff --git a/src/.gitignore b/src/.gitignore
index c5a7eed911..4c55e31a83 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -54,6 +54,17 @@
 /pair_kim.cpp
 /pair_kim.h
 
+/pair_lepton.cpp
+/pair_lepton.h
+/bond_lepton.cpp
+/bond_lepton.h
+/angle_lepton.cpp
+/angle_lepton.h
+/dihedral_lepton.cpp
+/dihedral_lepton.h
+/lepton_utils.cpp
+/lepton_utils.h
+
 /pair_pace.cpp
 /pair_pace.h
 /pair_pace_extrapolation.cpp
diff --git a/src/COLVARS/Install.sh b/src/COLVARS/Install.sh
index ebabfd7a1e..d5c811e582 100755
--- a/src/COLVARS/Install.sh
+++ b/src/COLVARS/Install.sh
@@ -38,19 +38,31 @@ if (test $1 = 1) then
 
   if (test -e ../Makefile.package) then
     sed -i -e 's/[^ \t]*colvars[^ \t]* //g' ../Makefile.package
-    sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/colvars |' ../Makefile.package
-    sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/colvars$(LIBOBJDIR) |' ../Makefile.package
-    sed -i -e 's|^PKG_LIB =[ \t]*|&-lcolvars |' ../Makefile.package
-    sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(colvars_SYSINC) |' ../Makefile.package
-    sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(colvars_SYSLIB) |' ../Makefile.package
-    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(colvars_SYSPATH) |' ../Makefile.package
+    if (test ! -e ../pair_lepton.cpp) then
+      sed -i -e 's/[^ \t]*lepton[^ \t]* //g' ../Makefile.package
+    fi
+    sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/colvars -I..\/..\/lib\/lepton\/include -I..\/..\/lib\/lepton |' ../Makefile.package
+    sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/colvars$(LIBOBJDIR) -L..\/..\/lib\/lepton$(LIBOBJDIR) |' ../Makefile.package
+    sed -i -e 's|^PKG_LIB =[ \t]*|&-lcolvars -llepton |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(colvars_SYSINC) $(lepton_SYSINC) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(colvars_SYSLIB) $(lepton_SYSLIB) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(colvars_SYSPATH) $(lepton_SYSPATH) |' ../Makefile.package
   fi
 
   if (test -e ../Makefile.package.settings) then
     sed -i -e '/^[ \t]*include.*colvars.*$/d' ../Makefile.package.settings
+    if (test ! -e ../pair_lepton.cpp) then
+      sed -i -e '/^[ \t]*include.*lepton.*$/d' ../Makefile.package.settings
+    fi
     # multiline form needed for BSD sed on Macs
     sed -i -e '4 i \
 include ..\/..\/lib\/colvars\/Makefile.lammps
+' ../Makefile.package.settings
+
+    sed -i -e '/^[ \t]*include.*lepton.*$/d' ../Makefile.package.settings
+    # multiline form needed for BSD sed on Macs
+    sed -i -e '4 i \
+include ..\/..\/lib\/lepton\/Makefile.lammps
 ' ../Makefile.package.settings
 
   fi
@@ -59,10 +71,14 @@ elif (test $1 = 0) then
 
   if (test -e ../Makefile.package) then
     sed -i -e 's/[^ \t]*colvars[^ \t]* //g' ../Makefile.package
+    if (test ! -e ../pair_lepton.cpp) then
+      sed -i -e 's/[^ \t]*lepton[^ \t]* //g' ../Makefile.package
+    fi
   fi
-
   if (test -e ../Makefile.package.settings) then
     sed -i -e '/^[ \t]*include.*colvars.*$/d' ../Makefile.package.settings
+    if (test ! -e ../pair_lepton.cpp) then
+      sed -i -e '/^[ \t]*include.*lepton.*$/d' ../Makefile.package.settings
+    fi
   fi
-
 fi
diff --git a/src/Depend.sh b/src/Depend.sh
index cbc907df16..6cf613cde7 100755
--- a/src/Depend.sh
+++ b/src/Depend.sh
@@ -118,6 +118,10 @@ if (test $1 = "KSPACE") then
   depend FEP
 fi
 
+if (test $1 = "LEPTON") then
+  depend OPENMP
+fi
+
 if (test $1 = "MANYBODY") then
   depend ATC
   depend GPU
diff --git a/src/LEPTON/Install.sh b/src/LEPTON/Install.sh
new file mode 100755
index 0000000000..81f50a99fb
--- /dev/null
+++ b/src/LEPTON/Install.sh
@@ -0,0 +1,72 @@
+# Install/unInstall package files in LAMMPS
+# mode = 0/1/2 for uninstall/install/update
+
+mode=$1
+
+# arg1 = file, arg2 = file it depends on
+
+# enforce using portable C locale
+LC_ALL=C
+export LC_ALL
+
+action () {
+  if (test $mode = 0) then
+    rm -f ../$1
+  elif (! cmp -s $1 ../$1) then
+    if (test -z "$2" || test -e ../$2) then
+      cp $1 ..
+      if (test $mode = 2) then
+        echo "  updating src/$1"
+      fi
+    fi
+  elif (test -n "$2") then
+    if (test ! -e ../$2) then
+      rm -f ../$1
+    fi
+  fi
+}
+
+# all package files with no dependencies
+
+for file in *.cpp *.h; do
+  test -f ${file} && action $file
+done
+
+# edit 2 Makefile.package files to include/exclude package info
+
+if (test $1 = 1) then
+
+  if (test -e ../Makefile.package) then
+    sed -i -e 's/[^ \t]*lepton[^ \t]* //g' ../Makefile.package
+    sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/lepton\/include -I..\/..\/lib\/lepton |' ../Makefile.package
+    sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/lepton$(LIBOBJDIR) |' ../Makefile.package
+    sed -i -e 's|^PKG_LIB =[ \t]*|&-llepton |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(lepton_SYSINC) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(lepton_SYSLIB) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(lepton_SYSPATH) |' ../Makefile.package
+  fi
+
+  if (test -e ../Makefile.package.settings) then
+    sed -i -e '/^[ \t]*include.*lepton.*$/d' ../Makefile.package.settings
+    # multiline form needed for BSD sed on Macs
+    sed -i -e '4 i \
+include ..\/..\/lib\/lepton\/Makefile.lammps
+' ../Makefile.package.settings
+
+  fi
+
+elif (test $1 = 0) then
+
+  if (test -e ../Makefile.package) then
+    if (test ! -e ../fix_colvars.cpp) then
+      sed -i -e 's/[^ \t]*lepton[^ \t]* //g' ../Makefile.package
+    fi
+  fi
+
+  if (test -e ../Makefile.package.settings) then
+    if (test ! -e ../fix_colvars.cpp) then
+      sed -i -e '/^[ \t]*include.*lepton.*$/d' ../Makefile.package.settings
+    fi
+  fi
+
+fi
diff --git a/src/LEPTON/angle_lepton.cpp b/src/LEPTON/angle_lepton.cpp
new file mode 100644
index 0000000000..e985509b1f
--- /dev/null
+++ b/src/LEPTON/angle_lepton.cpp
@@ -0,0 +1,382 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "angle_lepton.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "math_const.h"
+#include "memory.h"
+#include "neighbor.h"
+
+#include <cmath>
+
+#include "Lepton.h"
+#include "lepton_utils.h"
+
+using namespace LAMMPS_NS;
+using MathConst::DEG2RAD;
+using MathConst::RAD2DEG;
+
+static constexpr double SMALL = 0.001;
+
+/* ---------------------------------------------------------------------- */
+
+AngleLepton::AngleLepton(LAMMPS *_lmp) :
+    Angle(_lmp), theta0(nullptr), type2expression(nullptr), offset(nullptr)
+{
+  writedata = 1;
+  reinitflag = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+AngleLepton::~AngleLepton()
+{
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(theta0);
+    memory->destroy(type2expression);
+    memory->destroy(offset);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AngleLepton::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+  ev_init(eflag, vflag);
+  if (evflag) {
+    if (eflag) {
+      if (force->newton_bond)
+        eval<1, 1, 1>();
+      else
+        eval<1, 1, 0>();
+    } else {
+      if (force->newton_bond)
+        eval<1, 0, 1>();
+      else
+        eval<1, 0, 0>();
+    }
+  } else {
+    if (force->newton_bond)
+      eval<0, 0, 1>();
+    else
+      eval<0, 0, 0>();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND> void AngleLepton::eval()
+{
+  std::vector<Lepton::CompiledExpression> angleforce;
+  std::vector<Lepton::CompiledExpression> anglepot;
+  for (const auto &expr : expressions) {
+    auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+    angleforce.emplace_back(parsed.differentiate("theta").createCompiledExpression());
+    if (EFLAG) anglepot.emplace_back(parsed.createCompiledExpression());
+  }
+
+  const double *const *const x = atom->x;
+  double *const *const f = atom->f;
+  const int *const *const anglelist = neighbor->anglelist;
+  const int nanglelist = neighbor->nanglelist;
+  const int nlocal = atom->nlocal;
+
+  for (int n = 0; n < nanglelist; n++) {
+    const int i1 = anglelist[n][0];
+    const int i2 = anglelist[n][1];
+    const int i3 = anglelist[n][2];
+    const int type = anglelist[n][3];
+
+    // 1st bond
+
+    const double delx1 = x[i1][0] - x[i2][0];
+    const double dely1 = x[i1][1] - x[i2][1];
+    const double delz1 = x[i1][2] - x[i2][2];
+
+    const double rsq1 = delx1 * delx1 + dely1 * dely1 + delz1 * delz1;
+    const double r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    const double delx2 = x[i3][0] - x[i2][0];
+    const double dely2 = x[i3][1] - x[i2][1];
+    const double delz2 = x[i3][2] - x[i2][2];
+
+    const double rsq2 = delx2 * delx2 + dely2 * dely2 + delz2 * delz2;
+    const double r2 = sqrt(rsq2);
+
+    // angle (cos and sin)
+
+    double c = delx1 * delx2 + dely1 * dely2 + delz1 * delz2;
+    c /= r1 * r2;
+
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    double s = sqrt(1.0 - c * c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0 / s;
+
+    // force and energy
+
+    const double dtheta = acos(c) - theta0[type];
+    const int idx = type2expression[type];
+    angleforce[idx].getVariableReference("theta") = dtheta;
+
+    const double a = -angleforce[idx].evaluate() * s;
+    const double a11 = a * c / rsq1;
+    const double a12 = -a / (r1 * r2);
+    const double a22 = a * c / rsq2;
+
+    double f1[3], f3[3];
+    f1[0] = a11 * delx1 + a12 * delx2;
+    f1[1] = a11 * dely1 + a12 * dely2;
+    f1[2] = a11 * delz1 + a12 * delz2;
+    f3[0] = a22 * delx2 + a12 * delx1;
+    f3[1] = a22 * dely2 + a12 * dely1;
+    f3[2] = a22 * delz2 + a12 * delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    double eangle = 0.0;
+    if (EFLAG) {
+      anglepot[idx].getVariableReference("theta") = dtheta;
+      eangle = anglepot[idx].evaluate() - offset[type];
+    }
+    if (EVFLAG)
+      ev_tally(i1, i2, i3, nlocal, NEWTON_BOND, eangle, f1, f3, delx1, dely1, delz1, delx2, dely2,
+               delz2);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AngleLepton::allocate()
+{
+  allocated = 1;
+  const int np1 = atom->nangletypes + 1;
+
+  memory->create(theta0, np1, "angle:theta0");
+  memory->create(type2expression, np1, "angle:type2expression");
+  memory->create(offset, np1, "angle:offset");
+  memory->create(setflag, np1, "angle:setflag");
+  for (int i = 1; i < np1; i++) setflag[i] = 0;
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more types
+------------------------------------------------------------------------- */
+
+void AngleLepton::coeff(int narg, char **arg)
+{
+  if (narg != 3) error->all(FLERR, "Incorrect number of args for angle coefficients");
+  if (!allocated) allocate();
+
+  int ilo, ihi;
+  utils::bounds(FLERR, arg[0], 1, atom->nangletypes, ilo, ihi, error);
+
+  double theta0_one = utils::numeric(FLERR, arg[1], false, lmp);
+
+  // remove whitespace and quotes from expression string and then
+  // check if the expression can be parsed and evaluated without error
+  std::string exp_one = LeptonUtils::condense(arg[2]);
+  double offset_one = 0.0;
+  try {
+    auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
+    auto anglepot = parsed.createCompiledExpression();
+    auto angleforce = parsed.differentiate("theta").createCompiledExpression();
+    anglepot.getVariableReference("theta") = 0.0;
+    angleforce.getVariableReference("theta") = 0.0;
+    offset_one = anglepot.evaluate();
+    angleforce.evaluate();
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  std::size_t idx = 0;
+  for (const auto &exp : expressions) {
+    if (exp == exp_one) break;
+    ++idx;
+  }
+
+  // if not found, add to list
+  if ((expressions.size() == 0) || (idx == expressions.size())) expressions.push_back(exp_one);
+
+  // convert theta0 from degrees to radians
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    theta0[i] = DEG2RAD * theta0_one;
+    type2expression[i] = idx;
+    offset[i] = offset_one;
+    setflag[i] = 1;
+    count++;
+  }
+
+  if (count == 0) error->all(FLERR, "Incorrect args for angle coefficients");
+}
+
+/* ---------------------------------------------------------------------- */
+
+double AngleLepton::equilibrium_angle(int i)
+{
+  return theta0[i];
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes out coeffs to restart file
+------------------------------------------------------------------------- */
+
+void AngleLepton::write_restart(FILE *fp)
+{
+  fwrite(&theta0[1], sizeof(double), atom->nangletypes, fp);
+  fwrite(&type2expression[1], sizeof(int), atom->nangletypes, fp);
+  fwrite(&offset[1], sizeof(double), atom->nangletypes, fp);
+
+  int num = expressions.size();
+  int maxlen = 0;
+  for (const auto &exp : expressions) maxlen = MAX(maxlen, (int) exp.size());
+  ++maxlen;
+
+  fwrite(&num, sizeof(int), 1, fp);
+  fwrite(&maxlen, sizeof(int), 1, fp);
+  for (const auto &exp : expressions) {
+    int n = exp.size() + 1;
+    fwrite(&n, sizeof(int), 1, fp);
+    fwrite(exp.c_str(), sizeof(char), n, fp);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads coeffs from restart file, bcasts them
+------------------------------------------------------------------------- */
+
+void AngleLepton::read_restart(FILE *fp)
+{
+  allocate();
+
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &theta0[1], sizeof(double), atom->nangletypes, fp, nullptr, error);
+    utils::sfread(FLERR, &type2expression[1], sizeof(int), atom->nangletypes, fp, nullptr, error);
+    utils::sfread(FLERR, &offset[1], sizeof(double), atom->nangletypes, fp, nullptr, error);
+  }
+  MPI_Bcast(&theta0[1], atom->nangletypes, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&type2expression[1], atom->nangletypes, MPI_INT, 0, world);
+  MPI_Bcast(&offset[1], atom->nangletypes, MPI_DOUBLE, 0, world);
+  for (int i = 1; i <= atom->nangletypes; i++) setflag[i] = 1;
+
+  int num, maxlen, len;
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &num, sizeof(int), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &maxlen, sizeof(int), 1, fp, nullptr, error);
+  }
+  MPI_Bcast(&num, 1, MPI_INT, 0, world);
+  MPI_Bcast(&maxlen, 1, MPI_INT, 0, world);
+
+  char *buf = new char[maxlen];
+
+  for (int i = 0; i < num; ++i) {
+    if (comm->me == 0) {
+      utils::sfread(FLERR, &len, sizeof(int), 1, fp, nullptr, error);
+      utils::sfread(FLERR, buf, sizeof(char), len, fp, nullptr, error);
+    }
+    MPI_Bcast(buf, maxlen, MPI_CHAR, 0, world);
+    expressions.push_back(buf);
+  }
+
+  delete[] buf;
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to data file
+------------------------------------------------------------------------- */
+
+void AngleLepton::write_data(FILE *fp)
+{
+  for (int i = 1; i <= atom->nangletypes; i++)
+    fprintf(fp, "%d %g %s\n", i, RAD2DEG * theta0[i], expressions[type2expression[i]].c_str());
+}
+
+/* ---------------------------------------------------------------------- */
+
+double AngleLepton::single(int type, int i1, int i2, int i3)
+{
+  double **x = atom->x;
+
+  double delx1 = x[i1][0] - x[i2][0];
+  double dely1 = x[i1][1] - x[i2][1];
+  double delz1 = x[i1][2] - x[i2][2];
+  domain->minimum_image(delx1, dely1, delz1);
+  double r1 = sqrt(delx1 * delx1 + dely1 * dely1 + delz1 * delz1);
+
+  double delx2 = x[i3][0] - x[i2][0];
+  double dely2 = x[i3][1] - x[i2][1];
+  double delz2 = x[i3][2] - x[i2][2];
+  domain->minimum_image(delx2, dely2, delz2);
+  double r2 = sqrt(delx2 * delx2 + dely2 * dely2 + delz2 * delz2);
+
+  double c = delx1 * delx2 + dely1 * dely2 + delz1 * delz2;
+  c /= r1 * r2;
+  if (c > 1.0) c = 1.0;
+  if (c < -1.0) c = -1.0;
+
+  double dtheta = acos(c) - theta0[type];
+  auto expr = expressions[type2expression[type]];
+  auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+  auto anglepot = parsed.createCompiledExpression();
+  anglepot.getVariableReference("theta") = dtheta;
+  return anglepot.evaluate() - offset[type];
+}
+
+/* ----------------------------------------------------------------------
+   return ptr to internal members upon request
+------------------------------------------------------------------------ */
+
+void *AngleLepton::extract(const char *str, int &dim)
+{
+  dim = 1;
+  if (str) {
+    std::string keyword(str);
+    if (keyword == "theta0") return (void *) theta0;
+  }
+  return nullptr;
+}
diff --git a/src/LEPTON/angle_lepton.h b/src/LEPTON/angle_lepton.h
new file mode 100644
index 0000000000..67d2718fb6
--- /dev/null
+++ b/src/LEPTON/angle_lepton.h
@@ -0,0 +1,53 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+// clang-format off
+AngleStyle(lepton,AngleLepton);
+// clang-format on
+#else
+
+#ifndef LMP_ANGLE_LEPTON_H
+#define LMP_ANGLE_LEPTON_H
+
+#include "angle.h"
+
+namespace LAMMPS_NS {
+
+class AngleLepton : public Angle {
+ public:
+  AngleLepton(class LAMMPS *);
+  ~AngleLepton() override;
+  void compute(int, int) override;
+  void coeff(int, char **) override;
+  double equilibrium_angle(int) override;
+  void write_restart(FILE *) override;
+  void read_restart(FILE *) override;
+  void write_data(FILE *) override;
+  double single(int, int, int, int) override;
+  void *extract(const char *, int &) override;
+
+ protected:
+  std::vector<std::string> expressions;
+  double *theta0;
+  int *type2expression;
+  double *offset;
+
+  virtual void allocate();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND> void eval();
+};
+}    // namespace LAMMPS_NS
+#endif
+#endif
diff --git a/src/LEPTON/bond_lepton.cpp b/src/LEPTON/bond_lepton.cpp
new file mode 100644
index 0000000000..4a168902b8
--- /dev/null
+++ b/src/LEPTON/bond_lepton.cpp
@@ -0,0 +1,329 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "bond_lepton.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+
+#include <cmath>
+
+#include "Lepton.h"
+#include "lepton_utils.h"
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+BondLepton::BondLepton(LAMMPS *_lmp) :
+    Bond(_lmp), r0(nullptr), type2expression(nullptr), offset(nullptr)
+{
+  writedata = 1;
+  reinitflag = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+BondLepton::~BondLepton()
+{
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(r0);
+    memory->destroy(type2expression);
+    memory->destroy(offset);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void BondLepton::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+  ev_init(eflag, vflag);
+  if (evflag) {
+    if (eflag) {
+      if (force->newton_bond)
+        eval<1, 1, 1>();
+      else
+        eval<1, 1, 0>();
+    } else {
+      if (force->newton_bond)
+        eval<1, 0, 1>();
+      else
+        eval<1, 0, 0>();
+    }
+  } else {
+    if (force->newton_bond)
+      eval<0, 0, 1>();
+    else
+      eval<0, 0, 0>();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+template <int EVFLAG, int EFLAG, int NEWTON_BOND> void BondLepton::eval()
+{
+  std::vector<Lepton::CompiledExpression> bondforce;
+  std::vector<Lepton::CompiledExpression> bondpot;
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+      bondforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      if (EFLAG) bondpot.emplace_back(parsed.createCompiledExpression());
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  const double *const *const x = atom->x;
+  double *const *const f = atom->f;
+  const int *const *const bondlist = neighbor->bondlist;
+  const int nbondlist = neighbor->nbondlist;
+  const int nlocal = atom->nlocal;
+
+  for (int n = 0; n < nbondlist; n++) {
+    const int i1 = bondlist[n][0];
+    const int i2 = bondlist[n][1];
+    const int type = bondlist[n][2];
+
+    const double delx = x[i1][0] - x[i2][0];
+    const double dely = x[i1][1] - x[i2][1];
+    const double delz = x[i1][2] - x[i2][2];
+
+    const double rsq = delx * delx + dely * dely + delz * delz;
+    const double r = sqrt(rsq);
+    const double dr = r - r0[type];
+    const int idx = type2expression[type];
+
+    // force and energy
+
+    double fbond = 0.0;
+    if (r > 0.0) {
+      bondforce[idx].getVariableReference("r") = dr;
+      fbond = -bondforce[idx].evaluate() / r;
+    }
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx * fbond;
+      f[i1][1] += dely * fbond;
+      f[i1][2] += delz * fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx * fbond;
+      f[i2][1] -= dely * fbond;
+      f[i2][2] -= delz * fbond;
+    }
+
+    double ebond = 0.0;
+    if (EFLAG) {
+      bondpot[idx].getVariableReference("r") = dr;
+      ebond = bondpot[idx].evaluate() - offset[type];
+    }
+    if (EVFLAG) ev_tally(i1, i2, nlocal, NEWTON_BOND, ebond, fbond, delx, dely, delz);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void BondLepton::allocate()
+{
+  allocated = 1;
+  const int np1 = atom->nbondtypes + 1;
+
+  memory->create(r0, np1, "bond:r0");
+  memory->create(type2expression, np1, "bond:type2expression");
+  memory->create(offset, np1, "bond:offset");
+  memory->create(setflag, np1, "bond:setflag");
+  for (int i = 1; i < np1; i++) setflag[i] = 0;
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more types
+------------------------------------------------------------------------- */
+
+void BondLepton::coeff(int narg, char **arg)
+{
+  if (narg != 3) error->all(FLERR, "Incorrect number of args for bond coefficients");
+  if (!allocated) allocate();
+
+  int ilo, ihi;
+  utils::bounds(FLERR, arg[0], 1, atom->nbondtypes, ilo, ihi, error);
+
+  double r0_one = utils::numeric(FLERR, arg[1], false, lmp);
+
+  // remove whitespace and quotes from expression string and then
+  // check if the expression can be parsed and evaluated without error
+  std::string exp_one = LeptonUtils::condense(arg[2]);
+  double offset_one = 0.0;
+  try {
+    auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
+    auto bondpot = parsed.createCompiledExpression();
+    auto bondforce = parsed.differentiate("r").createCompiledExpression();
+    bondpot.getVariableReference("r") = 0.0;
+    bondforce.getVariableReference("r") = 0.0;
+    offset_one = bondpot.evaluate();
+    bondforce.evaluate();
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  std::size_t idx = 0;
+  for (const auto &exp : expressions) {
+    if (exp == exp_one) break;
+    ++idx;
+  }
+
+  // if not found, add to list
+  if ((expressions.size() == 0) || (idx == expressions.size())) expressions.push_back(exp_one);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    r0[i] = r0_one;
+    type2expression[i] = idx;
+    offset[i] = offset_one;
+    setflag[i] = 1;
+    count++;
+  }
+
+  if (count == 0) error->all(FLERR, "Incorrect args for bond coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   return an equilbrium bond length
+------------------------------------------------------------------------- */
+
+double BondLepton::equilibrium_distance(int i)
+{
+  return r0[i];
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void BondLepton::write_restart(FILE *fp)
+{
+  fwrite(&r0[1], sizeof(double), atom->nbondtypes, fp);
+  fwrite(&type2expression[1], sizeof(int), atom->nbondtypes, fp);
+  fwrite(&offset[1], sizeof(double), atom->nbondtypes, fp);
+
+  int num = expressions.size();
+  int maxlen = 0;
+  for (const auto &exp : expressions) maxlen = MAX(maxlen, (int) exp.size());
+  ++maxlen;
+
+  fwrite(&num, sizeof(int), 1, fp);
+  fwrite(&maxlen, sizeof(int), 1, fp);
+  for (const auto &exp : expressions) {
+    int n = exp.size() + 1;
+    fwrite(&n, sizeof(int), 1, fp);
+    fwrite(exp.c_str(), sizeof(char), n, fp);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void BondLepton::read_restart(FILE *fp)
+{
+  allocate();
+
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &r0[1], sizeof(double), atom->nbondtypes, fp, nullptr, error);
+    utils::sfread(FLERR, &type2expression[1], sizeof(int), atom->nbondtypes, fp, nullptr, error);
+    utils::sfread(FLERR, &offset[1], sizeof(double), atom->nbondtypes, fp, nullptr, error);
+  }
+  MPI_Bcast(&r0[1], atom->nbondtypes, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&type2expression[1], atom->nbondtypes, MPI_INT, 0, world);
+  MPI_Bcast(&offset[1], atom->nbondtypes, MPI_DOUBLE, 0, world);
+  for (int i = 1; i <= atom->nbondtypes; i++) setflag[i] = 1;
+
+  int num, maxlen, len;
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &num, sizeof(int), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &maxlen, sizeof(int), 1, fp, nullptr, error);
+  }
+  MPI_Bcast(&num, 1, MPI_INT, 0, world);
+  MPI_Bcast(&maxlen, 1, MPI_INT, 0, world);
+
+  char *buf = new char[maxlen];
+
+  for (int i = 0; i < num; ++i) {
+    if (comm->me == 0) {
+      utils::sfread(FLERR, &len, sizeof(int), 1, fp, nullptr, error);
+      utils::sfread(FLERR, buf, sizeof(char), len, fp, nullptr, error);
+    }
+    MPI_Bcast(buf, maxlen, MPI_CHAR, 0, world);
+    expressions.push_back(buf);
+  }
+
+  delete[] buf;
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to data file
+------------------------------------------------------------------------- */
+
+void BondLepton::write_data(FILE *fp)
+{
+  for (int i = 1; i <= atom->nbondtypes; i++)
+    fprintf(fp, "%d %g %s\n", i, r0[i], expressions[type2expression[i]].c_str());
+}
+
+/* ---------------------------------------------------------------------- */
+
+double BondLepton::single(int type, double rsq, int /*i*/, int /*j*/, double &fforce)
+{
+  const double r = sqrt(rsq);
+  const double dr = r - r0[type];
+
+  auto expr = expressions[type2expression[type]];
+  auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+  auto bondpot = parsed.createCompiledExpression();
+  auto bondforce = parsed.differentiate("r").createCompiledExpression();
+  bondforce.getVariableReference("r") = dr;
+  bondpot.getVariableReference("r") = dr;
+
+  // force and energy
+
+  fforce = 0.0;
+  double ebond = 0.0;
+  if (r > 0.0) {
+    fforce = -bondforce.evaluate() / r;
+    ebond = bondpot.evaluate() - offset[type];
+  }
+  return ebond;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void *BondLepton::extract(const char *str, int &dim)
+{
+  dim = 1;
+  if (str) {
+    std::string keyword(str);
+    if (keyword == "r0") return (void *) r0;
+  }
+  return nullptr;
+}
diff --git a/src/LEPTON/bond_lepton.h b/src/LEPTON/bond_lepton.h
new file mode 100644
index 0000000000..9e693298a7
--- /dev/null
+++ b/src/LEPTON/bond_lepton.h
@@ -0,0 +1,53 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+// clang-format off
+BondStyle(lepton,BondLepton);
+// clang-format on
+#else
+
+#ifndef LMP_BOND_LEPTON_H
+#define LMP_BOND_LEPTON_H
+
+#include "bond.h"
+
+namespace LAMMPS_NS {
+
+class BondLepton : public Bond {
+ public:
+  BondLepton(class LAMMPS *);
+  ~BondLepton() override;
+  void compute(int, int) override;
+  void coeff(int, char **) override;
+  double equilibrium_distance(int) override;
+  void write_restart(FILE *) override;
+  void read_restart(FILE *) override;
+  void write_data(FILE *) override;
+  double single(int, double, int, int, double &) override;
+  void *extract(const char *, int &) override;
+
+ protected:
+  std::vector<std::string> expressions;
+  double *r0;
+  int *type2expression;
+  double *offset;
+
+  virtual void allocate();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND> void eval();
+};
+}    // namespace LAMMPS_NS
+#endif
+#endif
diff --git a/src/LEPTON/dihedral_lepton.cpp b/src/LEPTON/dihedral_lepton.cpp
new file mode 100644
index 0000000000..7549d771e0
--- /dev/null
+++ b/src/LEPTON/dihedral_lepton.cpp
@@ -0,0 +1,515 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+   Using parts of dihedral_table.cpp by Andrew Jewett (jewett.aij at gmail)
+------------------------------------------------------------------------- */
+
+#include "dihedral_lepton.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "math_const.h"
+#include "math_extra.h"
+#include "memory.h"
+#include "neighbor.h"
+
+#include <cmath>
+
+#include "Lepton.h"
+#include "lepton_utils.h"
+
+using namespace LAMMPS_NS;
+using MathConst::DEG2RAD;
+using MathConst::MY_2PI;
+using MathConst::RAD2DEG;
+using MathExtra::cross3;
+using MathExtra::dot3;
+using MathExtra::norm3;
+
+static constexpr int g_dim = 3;
+
+/* ---------------------------------------------------------------------- */
+
+DihedralLepton::DihedralLepton(LAMMPS *_lmp) : Dihedral(_lmp), type2expression(nullptr)
+{
+  writedata = 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+DihedralLepton::~DihedralLepton()
+{
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(type2expression);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralLepton::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+  ev_init(eflag, vflag);
+  if (evflag) {
+    if (eflag) {
+      if (force->newton_bond)
+        eval<1, 1, 1>();
+      else
+        eval<1, 1, 0>();
+    } else {
+      if (force->newton_bond)
+        eval<1, 0, 1>();
+      else
+        eval<1, 0, 0>();
+    }
+  } else {
+    if (force->newton_bond)
+      eval<0, 0, 1>();
+    else
+      eval<0, 0, 0>();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND> void DihedralLepton::eval()
+{
+  std::vector<Lepton::CompiledExpression> dihedralforce;
+  std::vector<Lepton::CompiledExpression> dihedralpot;
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+      dihedralforce.emplace_back(parsed.differentiate("phi").createCompiledExpression());
+      if (EFLAG) dihedralpot.emplace_back(parsed.createCompiledExpression());
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  const double *const *const x = atom->x;
+  double *const *const f = atom->f;
+  const int *const *const dihedrallist = neighbor->dihedrallist;
+  const int ndihedrallist = neighbor->ndihedrallist;
+  const int nlocal = atom->nlocal;
+
+  // The dihedral angle "phi" is the angle between n123 and n234
+  // the planes defined by atoms i1,i2,i3, and i2,i3,i4.
+  //
+  // Definitions of vectors: vb12, vb23, vb34, perp12on23
+  //                         proj12on23, perp43on32, proj43on32
+  //
+  //  Note: The positions of the 4 atoms are labeled x[i1], x[i2], x[i3], x[i4]
+  //        (which are also vectors)
+  //
+  //             proj12on23                          proj34on23
+  //             --------->                         ----------->
+  //                           .
+  //                          .
+  //                         .
+  //                  x[i2] .                       x[i3]
+  //    .                __@----------vb23-------->@ . . . .           .
+  //   /|\                /|                        \                  |
+  //    |                /                           \                 |
+  //    |               /                             \                |
+  // perp12vs23        /                               \               |
+  //    |             /                                 \          perp34vs23
+  //    |          vb12                                  \             |
+  //    |           /                                   vb34           |
+  //    |          /                                       \           |
+  //    |         /                                         \          |
+  //    |        /                                           \         |
+  //            @                                             \        |
+  //                                                          _\|     \|/
+  //         x[i1]                                              @
+  //
+  //                                                           x[i4]
+  //
+
+  double vb12[g_dim];    // displacement vector from atom i1 towards atom i2
+  //     vb12[d]       = x[i2][d] - x[i1][d]      (for d=0,1,2)
+  double vb23[g_dim];    // displacement vector from atom i2 towards atom i3
+  //     vb23[d]       = x[i3][d] - x[i2][d]      (for d=0,1,2)
+  double vb34[g_dim];    // displacement vector from atom i3 towards atom i4
+  //     vb34[d]       = x[i4][d] - x[i3][d]      (for d=0,1,2)
+
+  //  n123 & n234: These two unit vectors are normal to the planes
+  //               defined by atoms 1,2,3 and 2,3,4.
+  double n123[g_dim];    //n123=vb23 x vb12 / |vb23 x vb12|  ("x" is cross product)
+  double n234[g_dim];    //n234=vb23 x vb34 / |vb23 x vb34|  ("x" is cross product)
+
+  double proj12on23[g_dim];
+  //    proj12on23[d] = (vb23[d]/|vb23|) * dot3(vb12,vb23)/|vb12|*|vb23|
+  double proj34on23[g_dim];
+  //    proj34on23[d] = (vb34[d]/|vb23|) * dot3(vb34,vb23)/|vb34|*|vb23|
+  double perp12on23[g_dim];
+  //    perp12on23[d] = v12[d] - proj12on23[d]
+  double perp34on23[g_dim];
+  //    perp34on23[d] = v34[d] - proj34on23[d]
+
+  double f1[3], f2[3], f3[3], f4[3];
+
+  for (int n = 0; n < ndihedrallist; n++) {
+    const int i1 = dihedrallist[n][0];
+    const int i2 = dihedrallist[n][1];
+    const int i3 = dihedrallist[n][2];
+    const int i4 = dihedrallist[n][3];
+    const int type = dihedrallist[n][4];
+
+    // ------ Step 1: Compute the dihedral angle "phi" ------
+    //
+
+    // get_phi() calculates the dihedral angle.
+    // This function also calculates the vectors:
+    // vb12, vb23, vb34, n123, and n234, which we will need later.
+
+    const double phi = get_phi(x[i1], x[i2], x[i3], x[i4], domain, vb12, vb23, vb34, n123, n234);
+
+    // ------ Step 2: Compute the gradient of phi with atomic position: ------
+    //
+    // Gradient variables:
+    //
+    // dphi_dx1, dphi_dx2, dphi_dx3, dphi_dx4 are the gradients of phi with
+    // respect to the atomic positions of atoms i1, i2, i3, i4, respectively.
+    // As an example, consider dphi_dx1.  The d'th element is:
+    double dphi_dx1[g_dim];    //                 d phi
+    double dphi_dx2[g_dim];    // dphi_dx1[d] = ----------    (partial derivatives)
+    double dphi_dx3[g_dim];    //               d x[i1][d]
+    double dphi_dx4[g_dim];    //where d=0,1,2 corresponds to x,y,z  (if g_dim==3)
+
+    double dot123 = dot3(vb12, vb23);
+    double dot234 = dot3(vb23, vb34);
+    double L23sqr = dot3(vb23, vb23);
+    double L23 = sqrt(L23sqr);    // (central bond length)
+    double inv_L23sqr = 0.0;
+    double inv_L23 = 0.0;
+    if (L23sqr != 0.0) {
+      inv_L23sqr = 1.0 / L23sqr;
+      inv_L23 = 1.0 / L23;
+    }
+    double neg_inv_L23 = -inv_L23;
+    double dot123_over_L23sqr = dot123 * inv_L23sqr;
+    double dot234_over_L23sqr = dot234 * inv_L23sqr;
+
+    for (int d = 0; d < g_dim; ++d) {
+      // See figure above for a visual definitions of these vectors:
+      proj12on23[d] = vb23[d] * dot123_over_L23sqr;
+      proj34on23[d] = vb23[d] * dot234_over_L23sqr;
+      perp12on23[d] = vb12[d] - proj12on23[d];
+      perp34on23[d] = vb34[d] - proj34on23[d];
+    }
+
+    // --- Compute the gradient vectors dphi/dx1 and dphi/dx4: ---
+
+    // These two gradients point in the direction of n123 and n234,
+    // and are scaled by the distances of atoms 1 and 4 from the central axis.
+    // Distance of atom 1 to central axis:
+    double perp12on23_len = sqrt(dot3(perp12on23, perp12on23));
+    // Distance of atom 4 to central axis:
+    double perp34on23_len = sqrt(dot3(perp34on23, perp34on23));
+
+    double inv_perp12on23 = 0.0;
+    if (perp12on23_len != 0.0) inv_perp12on23 = 1.0 / perp12on23_len;
+    double inv_perp34on23 = 0.0;
+    if (perp34on23_len != 0.0) inv_perp34on23 = 1.0 / perp34on23_len;
+
+    for (int d = 0; d < g_dim; ++d) {
+      dphi_dx1[d] = n123[d] * inv_perp12on23;
+      dphi_dx4[d] = n234[d] * inv_perp34on23;
+    }
+
+    // --- Compute the gradient vectors dphi/dx2 and dphi/dx3: ---
+    //
+    // This is more tricky because atoms 2 and 3 are shared by both planes
+    // 123 and 234 (the angle between which defines "phi").  Moving either
+    // one of these atoms effects both the 123 and 234 planes
+    // Both the 123 and 234 planes intersect with the plane perpendicular to the
+    // central bond axis (vb23).  The two lines where these intersections occur
+    // will shift when you move either atom 2 or atom 3.  The angle between
+    // these lines is the dihedral angle, phi.  We can define four quantities:
+    // dphi123_dx2 is the change in "phi" due to the movement of the 123 plane
+    //             ...as a result of moving atom 2.
+    // dphi234_dx2 is the change in "phi" due to the movement of the 234 plane
+    //             ...as a result of moving atom 2.
+    // dphi123_dx3 is the change in "phi" due to the movement of the 123 plane
+    //             ...as a result of moving atom 3.
+    // dphi234_dx3 is the change in "phi" due to the movement of the 234 plane
+    //             ...as a result of moving atom 3.
+
+    double proj12on23_len = dot123 * inv_L23;
+    double proj34on23_len = dot234 * inv_L23;
+    // Interpretation:
+    //The magnitude of "proj12on23_len" is the length of the proj12on23 vector.
+    //The sign is positive if it points in the same direction as the central
+    //bond (vb23).  Otherwise it is negative.  The same goes for "proj34on23".
+    //(In the example figure in the comment above, both variables are positive.)
+
+    // The forumula used in the 8 lines below explained here:
+    //   "supporting_information/doc/gradient_formula_explanation/"
+    double dphi123_dx2_coef = neg_inv_L23 * (L23 + proj12on23_len);
+    double dphi234_dx2_coef = inv_L23 * proj34on23_len;
+
+    double dphi234_dx3_coef = neg_inv_L23 * (L23 + proj34on23_len);
+    double dphi123_dx3_coef = inv_L23 * proj12on23_len;
+
+    for (int d = 0; d < g_dim; ++d) {
+      // Recall that the n123 and n234 plane normal vectors are proportional to
+      // the dphi/dx1 and dphi/dx2 gradients vectors
+      // It turns out we can save slightly more CPU cycles by expressing
+      // dphi/dx2 and dphi/dx3 as linear combinations of dphi/dx1 and dphi/dx2
+      // which we computed already (instead of n123 & n234).
+      dphi_dx2[d] = dphi123_dx2_coef * dphi_dx1[d] + dphi234_dx2_coef * dphi_dx4[d];
+      dphi_dx3[d] = dphi123_dx3_coef * dphi_dx1[d] + dphi234_dx3_coef * dphi_dx4[d];
+    }
+
+    const int idx = type2expression[type];
+    dihedralforce[idx].getVariableReference("phi") = phi;
+    double m_du_dphi = -dihedralforce[idx].evaluate();
+
+    // ----- Step 4: Calculate the force direction in real space -----
+
+    // chain rule:
+    //          d U          d U      d phi
+    // -f  =   -----   =    -----  *  -----
+    //          d x         d phi      d x
+    for (int d = 0; d < g_dim; ++d) {
+      f1[d] = m_du_dphi * dphi_dx1[d];
+      f2[d] = m_du_dphi * dphi_dx2[d];
+      f3[d] = m_du_dphi * dphi_dx3[d];
+      f4[d] = m_du_dphi * dphi_dx4[d];
+    }
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += f2[0];
+      f[i2][1] += f2[1];
+      f[i2][2] += f2[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += f4[0];
+      f[i4][1] += f4[1];
+      f[i4][2] += f4[2];
+    }
+
+    double edihedral = 0.0;
+    if (EFLAG) {
+      dihedralpot[idx].getVariableReference("phi") = phi;
+      edihedral = dihedralpot[idx].evaluate();
+    }
+    if (EVFLAG)
+      ev_tally(i1, i2, i3, i4, nlocal, NEWTON_BOND, edihedral, f1, f3, f4, -vb12[0], -vb12[1],
+               -vb12[2], vb23[0], vb23[1], vb23[2], vb34[0], vb34[1], vb34[2]);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralLepton::allocate()
+{
+  allocated = 1;
+  const int np1 = atom->ndihedraltypes + 1;
+
+  memory->create(type2expression, np1, "dihedral:type2expression");
+  memory->create(setflag, np1, "dihedral:setflag");
+  for (int i = 1; i < np1; i++) setflag[i] = 0;
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more types
+------------------------------------------------------------------------- */
+
+void DihedralLepton::coeff(int narg, char **arg)
+{
+  if (narg != 2) error->all(FLERR, "Incorrect number of args for dihedral coefficients");
+  if (!allocated) allocate();
+
+  int ilo, ihi;
+  utils::bounds(FLERR, arg[0], 1, atom->ndihedraltypes, ilo, ihi, error);
+
+  // remove whitespace and quotes from expression string and then
+  // check if the expression can be parsed and evaluated without error
+  std::string exp_one = LeptonUtils::condense(arg[1]);
+  try {
+    auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
+    auto dihedralpot = parsed.createCompiledExpression();
+    auto dihedralforce = parsed.differentiate("phi").createCompiledExpression();
+    dihedralpot.getVariableReference("phi") = 0.0;
+    dihedralforce.getVariableReference("phi") = 0.0;
+    dihedralforce.evaluate();
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  std::size_t idx = 0;
+  for (const auto &exp : expressions) {
+    if (exp == exp_one) break;
+    ++idx;
+  }
+
+  // if not found, add to list
+  if ((expressions.size() == 0) || (idx == expressions.size())) expressions.push_back(exp_one);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    type2expression[i] = idx;
+    setflag[i] = 1;
+    count++;
+  }
+
+  if (count == 0) error->all(FLERR, "Incorrect args for dihedral coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes out coeffs to restart file
+------------------------------------------------------------------------- */
+
+void DihedralLepton::write_restart(FILE *fp)
+{
+  fwrite(&type2expression[1], sizeof(int), atom->ndihedraltypes, fp);
+
+  int num = expressions.size();
+  int maxlen = 0;
+  for (const auto &exp : expressions) maxlen = MAX(maxlen, (int) exp.size());
+  ++maxlen;
+
+  fwrite(&num, sizeof(int), 1, fp);
+  fwrite(&maxlen, sizeof(int), 1, fp);
+  for (const auto &exp : expressions) {
+    int n = exp.size() + 1;
+    fwrite(&n, sizeof(int), 1, fp);
+    fwrite(exp.c_str(), sizeof(char), n, fp);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads coeffs from restart file, bcasts them
+------------------------------------------------------------------------- */
+
+void DihedralLepton::read_restart(FILE *fp)
+{
+  allocate();
+
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &type2expression[1], sizeof(int), atom->ndihedraltypes, fp, nullptr,
+                  error);
+  }
+  MPI_Bcast(&type2expression[1], atom->ndihedraltypes, MPI_INT, 0, world);
+  for (int i = 1; i <= atom->ndihedraltypes; i++) setflag[i] = 1;
+
+  int num, maxlen, len;
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &num, sizeof(int), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &maxlen, sizeof(int), 1, fp, nullptr, error);
+  }
+  MPI_Bcast(&num, 1, MPI_INT, 0, world);
+  MPI_Bcast(&maxlen, 1, MPI_INT, 0, world);
+
+  char *buf = new char[maxlen];
+
+  for (int i = 0; i < num; ++i) {
+    if (comm->me == 0) {
+      utils::sfread(FLERR, &len, sizeof(int), 1, fp, nullptr, error);
+      utils::sfread(FLERR, buf, sizeof(char), len, fp, nullptr, error);
+    }
+    MPI_Bcast(buf, maxlen, MPI_CHAR, 0, world);
+    expressions.push_back(buf);
+  }
+
+  delete[] buf;
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to data file
+------------------------------------------------------------------------- */
+
+void DihedralLepton::write_data(FILE *fp)
+{
+  for (int i = 1; i <= atom->ndihedraltypes; i++)
+    fprintf(fp, "%d %s\n", i, expressions[type2expression[i]].c_str());
+}
+
+// --------------------------------------------
+// ------- Calculate the dihedral angle -------
+// --------------------------------------------
+
+double DihedralLepton::get_phi(double const *x1,    //array holding x,y,z coords atom 1
+                               double const *x2,    // :       :      :      :        2
+                               double const *x3,    // :       :      :      :        3
+                               double const *x4,    // :       :      :      :        4
+                               Domain *domain,      //<-periodic boundary information
+                               // The following arrays are of doubles with g_dim elements.
+                               // (g_dim is a constant known at compile time, usually 3).
+                               // Their contents is calculated by this function.
+                               // Space for these vectors must be allocated in advance.
+                               // (This is not hidden internally because these vectors
+                               //  may be needed outside the function, later on.)
+                               double *vb12,    // will store x2-x1
+                               double *vb23,    // will store x3-x2
+                               double *vb34,    // will store x4-x3
+                               double *n123,    // will store normal to plane x1,x2,x3
+                               double *n234)    // will store normal to plane x2,x3,x4
+    const
+{
+  for (int d = 0; d < g_dim; ++d) {
+    vb12[d] = x2[d] - x1[d];    // 1st bond
+    vb23[d] = x3[d] - x2[d];    // 2nd bond
+    vb34[d] = x4[d] - x3[d];    // 3rd bond
+  }
+
+  //Consider periodic boundary conditions:
+  domain->minimum_image(vb12[0], vb12[1], vb12[2]);
+  domain->minimum_image(vb23[0], vb23[1], vb23[2]);
+  domain->minimum_image(vb34[0], vb34[1], vb34[2]);
+
+  //--- Compute the normal to the planes formed by atoms 1,2,3 and 2,3,4 ---
+
+  cross3(vb23, vb12, n123);    // <- n123=vb23 x vb12
+  cross3(vb23, vb34, n234);    // <- n234=vb23 x vb34
+
+  norm3(n123);
+  norm3(n234);
+
+  double cos_phi = -dot3(n123, n234);
+
+  if (cos_phi > 1.0)
+    cos_phi = 1.0;
+  else if (cos_phi < -1.0)
+    cos_phi = -1.0;
+
+  double phi = acos(cos_phi);
+
+  if (dot3(n123, vb34) > 0.0) {
+    phi = -phi;       //(Note: Negative dihedral angles are possible only in 3-D.)
+    phi += MY_2PI;    //<- This insures phi is always in the range 0 to 2*PI
+  }
+  return phi;
+}
diff --git a/src/LEPTON/dihedral_lepton.h b/src/LEPTON/dihedral_lepton.h
new file mode 100644
index 0000000000..4baf46c057
--- /dev/null
+++ b/src/LEPTON/dihedral_lepton.h
@@ -0,0 +1,50 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef DIHEDRAL_CLASS
+// clang-format off
+DihedralStyle(lepton,DihedralLepton);
+// clang-format on
+#else
+
+#ifndef LMP_DIHEDRAL_LEPTON_H
+#define LMP_DIHEDRAL_LEPTON_H
+
+#include "dihedral.h"
+
+namespace LAMMPS_NS {
+
+class DihedralLepton : public Dihedral {
+ public:
+  DihedralLepton(class LAMMPS *);
+  ~DihedralLepton() override;
+  void compute(int, int) override;
+  void coeff(int, char **) override;
+  void write_restart(FILE *) override;
+  void read_restart(FILE *) override;
+  void write_data(FILE *) override;
+
+ protected:
+  std::vector<std::string> expressions;
+  int *type2expression;
+
+  virtual void allocate();
+  double get_phi(double const *, double const *, double const *, double const *,
+                 class Domain *domain, double *, double *, double *, double *, double *) const;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND> void eval();
+};
+}    // namespace LAMMPS_NS
+#endif
+#endif
diff --git a/src/LEPTON/lepton_utils.cpp b/src/LEPTON/lepton_utils.cpp
new file mode 100644
index 0000000000..d0cfa576cb
--- /dev/null
+++ b/src/LEPTON/lepton_utils.cpp
@@ -0,0 +1,110 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lepton_utils.h"
+
+#include "error.h"
+#include "input.h"
+#include "lammps.h"
+#include "variable.h"
+
+#include "fmt/args.h"
+
+#include <cctype>
+#include <exception>
+#include <unordered_set>
+
+namespace LeptonUtils {
+class VariableException : public std::exception {
+  std::string message;
+
+ public:
+  // remove unused default constructor
+  VariableException() = delete;
+
+  explicit VariableException(const std::string &var, const std::string &expr)
+  {
+    message = fmt::format("Variable {} in expression {} does not exist", var, expr);
+  }
+  const char *what() const noexcept override { return message.c_str(); }
+};
+}    // namespace LeptonUtils
+
+/// remove whitespace and quotes from expression string
+std::string LeptonUtils::condense(const std::string &in)
+{
+  std::string out;
+  for (const auto &c : in)
+    if (!isspace(c) && (c != '"') && (c != '\'')) out.push_back(c);
+  return out;
+}
+
+/// substitute variable references with their values
+std::string LeptonUtils::substitute(const std::string &in, LAMMPS_NS::LAMMPS *lmp)
+{
+  std::string format, name;
+  std::unordered_set<std::string> vars;
+  bool in_var = false;
+  char hold = ' ';
+
+  for (const auto &c : in) {
+    if (in_var) {
+      if (isalnum(c) || (c == '_')) {
+        format.push_back(c);
+        name.push_back(c);
+      } else {
+        in_var = false;
+        format.push_back('}');
+        format.push_back(c);
+        vars.insert(name);
+      }
+    } else {
+      if (hold == 'v') {
+        if (c == '_') {
+          in_var = true;
+          hold = ' ';
+          name.clear();
+          format.push_back('{');
+        } else {
+          format.push_back(hold);
+          hold = ' ';
+          format.push_back(c);
+        }
+      } else {
+        if (c == 'v')
+          hold = c;
+        else
+          format.push_back(c);
+      }
+    }
+  }
+  if (in_var) {
+    format.push_back('}');
+    vars.insert(name);
+  }
+
+  auto variable = lmp->input->variable;
+  fmt::dynamic_format_arg_store<fmt::format_context> args;
+  for (const auto &v : vars) {
+    const char *val = variable->retrieve(v.c_str());
+    if (val)
+      args.push_back(fmt::arg(v.c_str(), val));
+    else
+      throw VariableException(v, in);
+  }
+  return fmt::vformat(format, args);
+}
diff --git a/src/LEPTON/lepton_utils.h b/src/LEPTON/lepton_utils.h
new file mode 100644
index 0000000000..89512e7552
--- /dev/null
+++ b/src/LEPTON/lepton_utils.h
@@ -0,0 +1,31 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include <string>
+
+// forward declarations
+
+namespace LAMMPS_NS {
+class LAMMPS;
+}
+
+// utility functions and classes
+
+namespace LeptonUtils {
+
+/// remove whitespace and quotes from expression string
+std::string condense(const std::string &);
+/// substitute LAMMPS variable references with their value
+std::string substitute(const std::string &, LAMMPS_NS::LAMMPS *);
+
+}    // namespace LeptonUtils
diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
new file mode 100644
index 0000000000..f0ccebbb44
--- /dev/null
+++ b/src/LEPTON/pair_lepton.cpp
@@ -0,0 +1,433 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "pair_lepton.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "neigh_list.h"
+#include "update.h"
+
+#include "Lepton.h"
+#include "lepton_utils.h"
+#include <cmath>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairLepton::PairLepton(LAMMPS *lmp) :
+    Pair(lmp), cut(nullptr), type2expression(nullptr), offset(nullptr)
+{
+  respa_enable = 0;
+  single_enable = 1;
+  writedata = 1;
+  restartinfo = 1;
+  reinitflag = 0;
+  cut_global = 0.0;
+  centroidstressflag = CENTROID_SAME;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairLepton::~PairLepton()
+{
+  if (allocated) {
+    memory->destroy(cut);
+    memory->destroy(cutsq);
+    memory->destroy(setflag);
+    memory->destroy(type2expression);
+    memory->destroy(offset);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLepton::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+  if (evflag) {
+    if (eflag) {
+      if (force->newton_pair)
+        eval<1, 1, 1>();
+      else
+        eval<1, 1, 0>();
+    } else {
+      if (force->newton_pair)
+        eval<1, 0, 1>();
+      else
+        eval<1, 0, 0>();
+    }
+  } else {
+    if (force->newton_pair)
+      eval<0, 0, 1>();
+    else
+      eval<0, 0, 0>();
+  }
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ---------------------------------------------------------------------- */
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLepton::eval()
+{
+  const double *const *const x = atom->x;
+  double *const *const f = atom->f;
+  const int *const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double *const special_lj = force->special_lj;
+
+  const int inum = list->inum;
+  const int *const ilist = list->ilist;
+  const int *const numneigh = list->numneigh;
+  const int *const *const firstneigh = list->firstneigh;
+  double fxtmp, fytmp, fztmp;
+
+  std::vector<Lepton::CompiledExpression> pairforce;
+  std::vector<Lepton::CompiledExpression> pairpot;
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+      pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      pairforce.back().getVariableReference("r");
+      if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  // loop over neighbors of my atoms
+
+  for (int ii = 0; ii < inum; ii++) {
+    const int i = ilist[ii];
+    const double xtmp = x[i][0];
+    const double ytmp = x[i][1];
+    const double ztmp = x[i][2];
+    const int itype = type[i];
+    const int *jlist = firstneigh[i];
+    const int jnum = numneigh[i];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    for (int jj = 0; jj < jnum; jj++) {
+      int j = jlist[jj];
+      const double factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+      const int jtype = type[j];
+
+      const double delx = xtmp - x[j][0];
+      const double dely = ytmp - x[j][1];
+      const double delz = ztmp - x[j][2];
+      const double rsq = delx * delx + dely * dely + delz * delz;
+
+      if (rsq < cutsq[itype][jtype]) {
+        const double r = sqrt(rsq);
+        const int idx = type2expression[itype][jtype];
+        double &r_for = pairforce[idx].getVariableReference("r");
+        r_for = r;
+        const double fpair = -pairforce[idx].evaluate() / r * factor_lj;
+
+        fxtmp += delx * fpair;
+        fytmp += dely * fpair;
+        fztmp += delz * fpair;
+        if (NEWTON_PAIR || (j < nlocal)) {
+          f[j][0] -= delx * fpair;
+          f[j][1] -= dely * fpair;
+          f[j][2] -= delz * fpair;
+        }
+
+        double evdwl = 0.0;
+        if (EFLAG) {
+          pairpot[idx].getVariableReference("r") = r;
+          evdwl = pairpot[idx].evaluate() - offset[itype][jtype];
+          evdwl *= factor_lj;
+        }
+
+        if (EVFLAG) ev_tally(i, j, nlocal, NEWTON_PAIR, evdwl, 0.0, fpair, delx, dely, delz);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+void PairLepton::allocate()
+{
+  allocated = 1;
+  int np1 = atom->ntypes + 1;
+
+  memory->create(setflag, np1, np1, "pair:setflag");
+  for (int i = 1; i < np1; i++)
+    for (int j = i; j < np1; j++) setflag[i][j] = 0;
+
+  memory->create(cut, np1, np1, "pair:cut");
+  memory->create(cutsq, np1, np1, "pair:cutsq");
+  memory->create(type2expression, np1, np1, "pair:type2expression");
+  memory->create(offset, np1, np1, "pair:offset");
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairLepton::settings(int narg, char **arg)
+{
+  if (narg != 1) error->all(FLERR, "Incorrect number of arguments for pair_style lepton command");
+  cut_global = utils::numeric(FLERR, arg[0], false, lmp);
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for all type pairs
+------------------------------------------------------------------------- */
+
+void PairLepton::coeff(int narg, char **arg)
+{
+  if (narg < 3 || narg > 4) error->all(FLERR, "Incorrect number of args for pair coefficients");
+  if (!allocated) allocate();
+
+  int ilo, ihi, jlo, jhi;
+  utils::bounds(FLERR, arg[0], 1, atom->ntypes, ilo, ihi, error);
+  utils::bounds(FLERR, arg[1], 1, atom->ntypes, jlo, jhi, error);
+
+  double cut_one = cut_global;
+  if (narg == 4) {
+    if (pppmflag || ewaldflag || msmflag || dispersionflag || tip4pflag) {
+      error->all(FLERR, "Only a global cutoff is allowed with Kspace compatibility enabled");
+    } else {
+      cut_one = utils::numeric(FLERR, arg[3], false, lmp);
+    }
+  }
+
+  // remove whitespace and quotes from expression string and then
+  // check if the expression can be parsed and evaluated without error
+  auto exp_one = LeptonUtils::condense(arg[2]);
+  try {
+    auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
+    auto pairforce = parsed.differentiate("r").createCompiledExpression();
+    auto pairpot = parsed.createCompiledExpression();
+    pairpot.getVariableReference("r") = 1.0;
+    pairforce.getVariableReference("r") = 1.0;
+    pairpot.evaluate();
+    pairforce.evaluate();
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  std::size_t idx = 0;
+  for (const auto &exp : expressions) {
+    if (exp == exp_one) break;
+    ++idx;
+  }
+
+  // not found, add to list
+  if ((expressions.size() == 0) || (idx == expressions.size())) expressions.push_back(exp_one);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    for (int j = MAX(jlo, i); j <= jhi; j++) {
+      cut[i][j] = cut_one;
+      setflag[i][j] = 1;
+      type2expression[i][j] = idx;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients");
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLepton::init_one(int i, int j)
+{
+  if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set");
+
+  offset[i][j] = 0.0;
+  if (offset_flag) {
+    try {
+      auto expr = LeptonUtils::substitute(expressions[type2expression[i][j]], lmp);
+      auto pairpot = Lepton::Parser::parse(expr).createCompiledExpression();
+      pairpot.getVariableReference("r") = cut[i][j];
+      offset[i][j] = pairpot.evaluate();
+    } catch (std::exception &) {
+    }
+  }
+
+  cut[j][i] = cut[i][j];
+  type2expression[j][i] = type2expression[i][j];
+  offset[j][i] = offset[i][j];
+
+  return cut[i][j];
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairLepton::write_restart(FILE *fp)
+{
+  write_restart_settings(fp);
+
+  for (int i = 1; i <= atom->ntypes; i++)
+    for (int j = i; j <= atom->ntypes; j++) {
+      fwrite(&setflag[i][j], sizeof(int), 1, fp);
+      if (setflag[i][j]) {
+        fwrite(&cut[i][j], sizeof(double), 1, fp);
+        fwrite(&type2expression[i][j], sizeof(int), 1, fp);
+      }
+    }
+
+  int num = expressions.size();
+  int maxlen = 0;
+  for (const auto &exp : expressions) maxlen = MAX(maxlen, (int) exp.size());
+  ++maxlen;
+
+  fwrite(&num, sizeof(int), 1, fp);
+  fwrite(&maxlen, sizeof(int), 1, fp);
+  for (const auto &exp : expressions) {
+    int n = exp.size() + 1;
+    fwrite(&n, sizeof(int), 1, fp);
+    fwrite(exp.c_str(), sizeof(char), n, fp);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairLepton::read_restart(FILE *fp)
+{
+  read_restart_settings(fp);
+
+  allocate();
+  expressions.clear();
+
+  const int me = comm->me;
+  for (int i = 1; i <= atom->ntypes; i++)
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (me == 0) utils::sfread(FLERR, &setflag[i][j], sizeof(int), 1, fp, nullptr, error);
+      MPI_Bcast(&setflag[i][j], 1, MPI_INT, 0, world);
+      if (setflag[i][j]) {
+        if (me == 0) {
+          utils::sfread(FLERR, &cut[i][j], sizeof(double), 1, fp, nullptr, error);
+          utils::sfread(FLERR, &type2expression[i][j], sizeof(int), 1, fp, nullptr, error);
+        }
+        MPI_Bcast(&cut[i][j], 1, MPI_DOUBLE, 0, world);
+        MPI_Bcast(&type2expression[i][j], 1, MPI_INT, 0, world);
+      }
+    }
+
+  int num, maxlen, len;
+  if (me == 0) {
+    utils::sfread(FLERR, &num, sizeof(int), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &maxlen, sizeof(int), 1, fp, nullptr, error);
+  }
+  MPI_Bcast(&num, 1, MPI_INT, 0, world);
+  MPI_Bcast(&maxlen, 1, MPI_INT, 0, world);
+
+  char *buf = new char[maxlen];
+
+  for (int i = 0; i < num; ++i) {
+    if (me == 0) {
+      utils::sfread(FLERR, &len, sizeof(int), 1, fp, nullptr, error);
+      utils::sfread(FLERR, buf, sizeof(char), len, fp, nullptr, error);
+    }
+    MPI_Bcast(buf, maxlen, MPI_CHAR, 0, world);
+    expressions.push_back(buf);
+  }
+
+  delete[] buf;
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairLepton::write_restart_settings(FILE *fp)
+{
+  fwrite(&cut_global, sizeof(double), 1, fp);
+  fwrite(&offset_flag, sizeof(int), 1, fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairLepton::read_restart_settings(FILE *fp)
+{
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &cut_global, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &offset_flag, sizeof(int), 1, fp, nullptr, error);
+  }
+  MPI_Bcast(&cut_global, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&offset_flag, 1, MPI_INT, 0, world);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to data file
+------------------------------------------------------------------------- */
+
+void PairLepton::write_data(FILE *fp)
+{
+  if (pppmflag || ewaldflag || msmflag || dispersionflag || tip4pflag) {
+    for (int i = 1; i <= atom->ntypes; i++)
+      fprintf(fp, "%d %s\n", i, expressions[type2expression[i][i]].c_str());
+  } else {
+    for (int i = 1; i <= atom->ntypes; i++)
+      fprintf(fp, "%d %s %g\n", i, expressions[type2expression[i][i]].c_str(), cut[i][i]);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes all pairs to data file
+------------------------------------------------------------------------- */
+
+void PairLepton::write_data_all(FILE *fp)
+{
+  if (pppmflag || ewaldflag || msmflag || dispersionflag || tip4pflag) {
+    for (int i = 1; i <= atom->ntypes; i++)
+      for (int j = i; j <= atom->ntypes; j++)
+        fprintf(fp, "%d %d %s\n", i, j, expressions[type2expression[i][j]].c_str());
+  } else {
+    for (int i = 1; i <= atom->ntypes; i++)
+      for (int j = i; j <= atom->ntypes; j++)
+        fprintf(fp, "%d %d %s %g\n", i, j, expressions[type2expression[i][j]].c_str(), cut[i][j]);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLepton::single(int /* i */, int /* j */, int itype, int jtype, double rsq,
+                          double /* factor_coul */, double factor_lj, double &fforce)
+{
+  auto expr = expressions[type2expression[itype][jtype]];
+  auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+  auto pairpot = parsed.createCompiledExpression();
+  auto pairforce = parsed.differentiate("r").createCompiledExpression();
+
+  const double r = sqrt(rsq);
+  pairpot.getVariableReference("r") = r;
+  pairforce.getVariableReference("r") = r;
+
+  fforce = -pairforce.evaluate() / r * factor_lj;
+  return (pairpot.evaluate() - offset[itype][jtype]) * factor_lj;
+}
diff --git a/src/LEPTON/pair_lepton.h b/src/LEPTON/pair_lepton.h
new file mode 100644
index 0000000000..e8454ce80e
--- /dev/null
+++ b/src/LEPTON/pair_lepton.h
@@ -0,0 +1,57 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(lepton,PairLepton);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_LEPTON_H
+#define LMP_PAIR_LEPTON_H
+
+#include "pair.h"
+
+namespace LAMMPS_NS {
+
+class PairLepton : public Pair {
+ public:
+  PairLepton(class LAMMPS *);
+  ~PairLepton() override;
+  void compute(int, int) override;
+  void settings(int, char **) override;
+  void coeff(int, char **) override;
+  double init_one(int, int) override;
+  void write_restart(FILE *) override;
+  void read_restart(FILE *) override;
+  void write_restart_settings(FILE *) override;
+  void read_restart_settings(FILE *) override;
+  void write_data(FILE *) override;
+  void write_data_all(FILE *) override;
+  double single(int, int, int, int, double, double, double, double &) override;
+
+ protected:
+  std::vector<std::string> expressions;
+  double **cut;
+  int **type2expression;
+  double **offset;
+  double cut_global;
+
+  virtual void allocate();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void eval();
+};
+}    // namespace LAMMPS_NS
+#endif
+#endif
diff --git a/src/LEPTON/pair_lepton_coul.cpp b/src/LEPTON/pair_lepton_coul.cpp
new file mode 100644
index 0000000000..81d2ff7a81
--- /dev/null
+++ b/src/LEPTON/pair_lepton_coul.cpp
@@ -0,0 +1,287 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "pair_lepton_coul.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "update.h"
+
+#include "Lepton.h"
+#include "lepton_utils.h"
+#include <cmath>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void PairLeptonCoul::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+  if (evflag) {
+    if (eflag) {
+      if (force->newton_pair)
+        eval<1, 1, 1>();
+      else
+        eval<1, 1, 0>();
+    } else {
+      if (force->newton_pair)
+        eval<1, 0, 1>();
+      else
+        eval<1, 0, 0>();
+    }
+  } else {
+    if (force->newton_pair)
+      eval<0, 0, 1>();
+    else
+      eval<0, 0, 0>();
+  }
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLeptonCoul::eval()
+{
+  const double *const *const x = atom->x;
+  double *const *const f = atom->f;
+  const double *const q = atom->q;
+  const int *const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double *const special_coul = force->special_coul;
+
+  const int inum = list->inum;
+  const int *const ilist = list->ilist;
+  const int *const numneigh = list->numneigh;
+  const int *const *const firstneigh = list->firstneigh;
+  double fxtmp, fytmp, fztmp;
+
+  const double q2e = sqrt(force->qqrd2e);
+
+  std::vector<Lepton::CompiledExpression> pairforce;
+  std::vector<Lepton::CompiledExpression> pairpot;
+  std::vector<std::pair<bool, bool>> have_q;
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+      pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
+      pairforce.back().getVariableReference("r");
+      have_q.emplace_back(std::make_pair(true, true));
+
+      // check if there are references to charges
+      try {
+        pairforce.back().getVariableReference("qi");
+      } catch (std::exception &) {
+        have_q.back().first = false;
+      }
+      try {
+        pairforce.back().getVariableReference("qj");
+      } catch (std::exception &) {
+        have_q.back().second = false;
+      }
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  // loop over neighbors of my atoms
+
+  for (int ii = 0; ii < inum; ii++) {
+    const int i = ilist[ii];
+    const double xtmp = x[i][0];
+    const double ytmp = x[i][1];
+    const double ztmp = x[i][2];
+    const int itype = type[i];
+    const int *jlist = firstneigh[i];
+    const int jnum = numneigh[i];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    for (int jj = 0; jj < jnum; jj++) {
+      int j = jlist[jj];
+      const double factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+      const int jtype = type[j];
+
+      const double delx = xtmp - x[j][0];
+      const double dely = ytmp - x[j][1];
+      const double delz = ztmp - x[j][2];
+      const double rsq = delx * delx + dely * dely + delz * delz;
+
+      if (rsq < cutsq[itype][jtype]) {
+        const double r = sqrt(rsq);
+        const int idx = type2expression[itype][jtype];
+        pairforce[idx].getVariableReference("r") = r;
+        if (have_q[idx].first) pairforce[idx].getVariableReference("qi") = q2e * q[i];
+        if (have_q[idx].second) pairforce[idx].getVariableReference("qj") = q2e * q[j];
+        const double fpair = -pairforce[idx].evaluate() / r * factor_coul;
+
+        fxtmp += delx * fpair;
+        fytmp += dely * fpair;
+        fztmp += delz * fpair;
+        if (NEWTON_PAIR || (j < nlocal)) {
+          f[j][0] -= delx * fpair;
+          f[j][1] -= dely * fpair;
+          f[j][2] -= delz * fpair;
+        }
+
+        double ecoul = 0.0;
+        if (EFLAG) {
+          pairpot[idx].getVariableReference("r") = r;
+          if (have_q[idx].first) pairpot[idx].getVariableReference("qi") = q2e * q[i];
+          if (have_q[idx].second) pairpot[idx].getVariableReference("qj") = q2e * q[j];
+          ecoul = pairpot[idx].evaluate();
+          ecoul *= factor_coul;
+        }
+
+        if (EVFLAG) ev_tally(i, j, nlocal, NEWTON_PAIR, 0.0, ecoul, fpair, delx, dely, delz);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairLeptonCoul::settings(int narg, char **arg)
+{
+  if (narg < 1) utils::missing_cmd_args(FLERR, "pair_style lepton/coul", error);
+  cut_global = utils::numeric(FLERR, arg[0], false, lmp);
+
+  // optional keywords
+  // assert the pair style is compatible with a specific long-range solver
+
+  int iarg = 1;
+  while (iarg < narg) {
+    if (strcmp(arg[iarg], "ewald") == 0)
+      ewaldflag = 1;
+    else if (strcmp(arg[iarg], "pppm") == 0)
+      pppmflag = 1;
+    else if (strcmp(arg[iarg], "msm") == 0)
+      msmflag = 1;
+    else if (strcmp(arg[iarg], "dispersion") == 0)
+      dispersionflag = 1;
+    else if (strcmp(arg[iarg], "tip4p") == 0)
+      tip4pflag = 1;
+    else
+      error->all(FLERR, "Unknown pair_style lepton/coul keyword: {}", arg[iarg]);
+    iarg++;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLeptonCoul::init_style()
+{
+  if (!atom->q_flag) error->all(FLERR, "Pair style lepton/coul requires atom attribute q");
+  if (offset_flag) error->all(FLERR, "Pair style lepton/coul does not suport pair_modify shift");
+  neighbor->add_request(this);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairLeptonCoul::write_restart_settings(FILE *fp)
+{
+  fwrite(&cut_global, sizeof(double), 1, fp);
+  fwrite(&ewaldflag, sizeof(int), 1, fp);
+  fwrite(&pppmflag, sizeof(int), 1, fp);
+  fwrite(&msmflag, sizeof(int), 1, fp);
+  fwrite(&dispersionflag, sizeof(int), 1, fp);
+  fwrite(&tip4pflag, sizeof(int), 1, fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairLeptonCoul::read_restart_settings(FILE *fp)
+{
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &cut_global, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &ewaldflag, sizeof(int), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &pppmflag, sizeof(int), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &msmflag, sizeof(int), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &dispersionflag, sizeof(int), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &tip4pflag, sizeof(int), 1, fp, nullptr, error);
+  }
+  MPI_Bcast(&cut_global, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&ewaldflag, 1, MPI_INT, 0, world);
+  MPI_Bcast(&pppmflag, 1, MPI_INT, 0, world);
+  MPI_Bcast(&msmflag, 1, MPI_INT, 0, world);
+  MPI_Bcast(&dispersionflag, 1, MPI_INT, 0, world);
+  MPI_Bcast(&tip4pflag, 1, MPI_INT, 0, world);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLeptonCoul::single(int i, int j, int itype, int jtype, double rsq, double factor_coul,
+                              double /* factor_lj */, double &fforce)
+{
+  auto expr = expressions[type2expression[itype][jtype]];
+  auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+  auto pairpot = parsed.createCompiledExpression();
+  auto pairforce = parsed.differentiate("r").createCompiledExpression();
+
+  const double r = sqrt(rsq);
+  const double q2e = sqrt(force->qqrd2e);
+  pairpot.getVariableReference("r") = r;
+  pairforce.getVariableReference("r") = r;
+  try {
+    pairpot.getVariableReference("qi") = q2e * atom->q[i];
+    pairforce.getVariableReference("qi") = q2e * atom->q[i];
+  } catch (std::exception &) {
+    /* ignore */
+  }
+  try {
+    pairpot.getVariableReference("qj") = q2e * atom->q[j];
+    pairforce.getVariableReference("qj") = q2e * atom->q[j];
+  } catch (std::exception &) {
+    /* ignore */
+  }
+
+  fforce = -pairforce.evaluate() / r * factor_coul;
+  return pairpot.evaluate() * factor_coul;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void *PairLeptonCoul::extract(const char *str, int &dim)
+{
+  if (pppmflag || ewaldflag || msmflag || dispersionflag || tip4pflag) {
+    if (strcmp(str, "cut_coul") == 0) {
+      dim = 0;
+      return (void *) &cut_global;
+    }
+  } else {
+    if (strcmp(str, "cut_coul") == 0) {
+      dim = 2;
+      return (void *) &cut;
+    }
+  }
+  return nullptr;
+}
diff --git a/src/LEPTON/pair_lepton_coul.h b/src/LEPTON/pair_lepton_coul.h
new file mode 100644
index 0000000000..8153792bd5
--- /dev/null
+++ b/src/LEPTON/pair_lepton_coul.h
@@ -0,0 +1,45 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(lepton/coul,PairLeptonCoul);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_LEPTON_COUL_H
+#define LMP_PAIR_LEPTON_COUL_H
+
+#include "pair_lepton.h"
+
+namespace LAMMPS_NS {
+
+class PairLeptonCoul : public PairLepton {
+ public:
+  PairLeptonCoul(class LAMMPS *_lmp) : PairLepton(_lmp){};
+  ~PairLeptonCoul() override{};
+  void compute(int, int) override;
+  void settings(int, char **) override;
+  void init_style() override;
+  void write_restart_settings(FILE *) override;
+  void read_restart_settings(FILE *) override;
+  double single(int, int, int, int, double, double, double, double &) override;
+  void *extract(const char *, int &) override;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void eval();
+};
+}    // namespace LAMMPS_NS
+#endif
+#endif
diff --git a/src/Makefile b/src/Makefile
index 6f3ece5376..13eeac58e7 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -88,6 +88,7 @@ PACKAGE = \
 	kspace \
 	latboltz \
 	latte \
+	lepton \
 	machdyn \
 	manifold \
 	manybody \
@@ -212,6 +213,7 @@ PACKLIB = \
 	kim \
 	kokkos \
 	latte \
+	lepton \
 	mpiio \
 	mscg \
 	poems \
@@ -224,6 +226,7 @@ PACKLIB = \
 	h5md \
 	ml-hdnnp \
 	latboltz \
+	lepton \
 	mdi \
 	mesont \
 	molfile \
@@ -240,7 +243,7 @@ PACKLIB = \
 
 PACKSYS = compress latboltz mpiio python
 
-PACKINT = atc awpmd colvars electrode gpu kokkos mesont ml-pod poems
+PACKINT = atc awpmd colvars electrode gpu kokkos lepton mesont ml-pod poems
 
 PACKEXT = \
 	adios \
@@ -636,6 +639,8 @@ lib-%:
 	  echo "Install script for lib $(@:lib-%=%) does not exist"; \
 	fi; touch main.cpp
 
+lib-colvars: lib-lepton
+
 # status = list src files that differ from package files
 # installed = list of installed packages
 # update = replace src files with newer package files
diff --git a/src/OPENMP/angle_lepton_omp.cpp b/src/OPENMP/angle_lepton_omp.cpp
new file mode 100644
index 0000000000..7e86a9e9bb
--- /dev/null
+++ b/src/OPENMP/angle_lepton_omp.cpp
@@ -0,0 +1,193 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "angle_lepton_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "suffix.h"
+
+#include <cmath>
+
+#include "Lepton.h"
+#include "lepton_utils.h"
+#include "omp_compat.h"
+using namespace LAMMPS_NS;
+
+static constexpr double SMALL = 0.001;
+
+/* ---------------------------------------------------------------------- */
+
+AngleLeptonOMP::AngleLeptonOMP(class LAMMPS *lmp) : AngleLepton(lmp), ThrOMP(lmp, THR_ANGLE)
+{
+  suffix_flag |= Suffix::OMP;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AngleLeptonOMP::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag, vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, nullptr, thr);
+
+    if (inum > 0) {
+      if (evflag) {
+        if (eflag) {
+          if (force->newton_bond)
+            eval<1, 1, 1>(ifrom, ito, thr);
+          else
+            eval<1, 1, 0>(ifrom, ito, thr);
+        } else {
+          if (force->newton_bond)
+            eval<1, 0, 1>(ifrom, ito, thr);
+          else
+            eval<1, 0, 0>(ifrom, ito, thr);
+        }
+      } else {
+        if (force->newton_bond)
+          eval<0, 0, 1>(ifrom, ito, thr);
+        else
+          eval<0, 0, 0>(ifrom, ito, thr);
+      }
+    }
+    thr->timer(Timer::BOND);
+    reduce_thr(this, eflag, vflag, thr);
+  }    // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
+{
+  std::vector<Lepton::CompiledExpression> angleforce;
+  std::vector<Lepton::CompiledExpression> anglepot;
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
+      angleforce.emplace_back(parsed.differentiate("theta").createCompiledExpression());
+      if (EFLAG) anglepot.emplace_back(parsed.createCompiledExpression());
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  const auto *_noalias const x = (dbl3_t *) atom->x[0];
+  auto *_noalias const f = (dbl3_t *) thr->get_f()[0];
+  const int4_t *_noalias const anglelist = (int4_t *) neighbor->anglelist[0];
+  const int nlocal = atom->nlocal;
+
+  for (int n = nfrom; n < nto; n++) {
+    const int i1 = anglelist[n].a;
+    const int i2 = anglelist[n].b;
+    const int i3 = anglelist[n].c;
+    const int type = anglelist[n].t;
+
+    // 1st bond
+
+    const double delx1 = x[i1].x - x[i2].x;
+    const double dely1 = x[i1].y - x[i2].y;
+    const double delz1 = x[i1].z - x[i2].z;
+
+    const double rsq1 = delx1 * delx1 + dely1 * dely1 + delz1 * delz1;
+    const double r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    const double delx2 = x[i3].x - x[i2].x;
+    const double dely2 = x[i3].y - x[i2].y;
+    const double delz2 = x[i3].z - x[i2].z;
+
+    const double rsq2 = delx2 * delx2 + dely2 * dely2 + delz2 * delz2;
+    const double r2 = sqrt(rsq2);
+
+    // angle (cos and sin)
+
+    double c = delx1 * delx2 + dely1 * dely2 + delz1 * delz2;
+    c /= r1 * r2;
+
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    double s = sqrt(1.0 - c * c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0 / s;
+
+    // force and energy
+
+    const double dtheta = acos(c) - theta0[type];
+    const int idx = type2expression[type];
+    angleforce[idx].getVariableReference("theta") = dtheta;
+
+    const double a = -angleforce[idx].evaluate() * s;
+    const double a11 = a * c / rsq1;
+    const double a12 = -a / (r1 * r2);
+    const double a22 = a * c / rsq2;
+
+    double f1[3], f3[3];
+    f1[0] = a11 * delx1 + a12 * delx2;
+    f1[1] = a11 * dely1 + a12 * dely2;
+    f1[2] = a11 * delz1 + a12 * delz2;
+    f3[0] = a22 * delx2 + a12 * delx1;
+    f3[1] = a22 * dely2 + a12 * dely1;
+    f3[2] = a22 * delz2 + a12 * delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1].x += f1[0];
+      f[i1].y += f1[1];
+      f[i1].z += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2].x -= f1[0] + f3[0];
+      f[i2].y -= f1[1] + f3[1];
+      f[i2].z -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3].x += f3[0];
+      f[i3].y += f3[1];
+      f[i3].z += f3[2];
+    }
+
+    double eangle = 0.0;
+    if (EFLAG) {
+      anglepot[idx].getVariableReference("theta") = dtheta;
+      eangle = anglepot[idx].evaluate() - offset[type];
+    }
+    if (EVFLAG)
+      ev_tally_thr(this, i1, i2, i3, nlocal, NEWTON_BOND, eangle, f1, f3, delx1, dely1, delz1,
+                   delx2, dely2, delz2, thr);
+  }
+}
diff --git a/src/OPENMP/angle_lepton_omp.h b/src/OPENMP/angle_lepton_omp.h
new file mode 100644
index 0000000000..e2f269c808
--- /dev/null
+++ b/src/OPENMP/angle_lepton_omp.h
@@ -0,0 +1,40 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+// clang-format off
+AngleStyle(lepton/omp,AngleLeptonOMP);
+// clang-format on
+#else
+
+#ifndef LMP_ANGLE_LEPTON_OMP_H
+#define LMP_ANGLE_LEPTON_OMP_H
+
+#include "angle_lepton.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleLeptonOMP : public AngleLepton, public ThrOMP {
+
+ public:
+  AngleLeptonOMP(class LAMMPS *lmp);
+  void compute(int, int) override;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData *const thr);
+};
+}    // namespace LAMMPS_NS
+#endif
+#endif
diff --git a/src/OPENMP/bond_lepton_omp.cpp b/src/OPENMP/bond_lepton_omp.cpp
new file mode 100644
index 0000000000..0029062366
--- /dev/null
+++ b/src/OPENMP/bond_lepton_omp.cpp
@@ -0,0 +1,151 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "bond_lepton_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "suffix.h"
+
+#include <cmath>
+
+#include "Lepton.h"
+#include "lepton_utils.h"
+#include "omp_compat.h"
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+BondLeptonOMP::BondLeptonOMP(class LAMMPS *_lmp) : BondLepton(_lmp), ThrOMP(_lmp, THR_BOND)
+{
+  suffix_flag |= Suffix::OMP;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void BondLeptonOMP::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag, vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, nullptr, thr);
+
+    if (inum > 0) {
+      if (evflag) {
+        if (eflag) {
+          if (force->newton_bond)
+            eval<1, 1, 1>(ifrom, ito, thr);
+          else
+            eval<1, 1, 0>(ifrom, ito, thr);
+        } else {
+          if (force->newton_bond)
+            eval<1, 0, 1>(ifrom, ito, thr);
+          else
+            eval<1, 0, 0>(ifrom, ito, thr);
+        }
+      } else {
+        if (force->newton_bond)
+          eval<0, 0, 1>(ifrom, ito, thr);
+        else
+          eval<0, 0, 0>(ifrom, ito, thr);
+      }
+    }
+    thr->timer(Timer::BOND);
+    reduce_thr(this, eflag, vflag, thr);
+  }    // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
+{
+  std::vector<Lepton::CompiledExpression> bondforce;
+  std::vector<Lepton::CompiledExpression> bondpot;
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
+      bondforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      if (EFLAG) bondpot.emplace_back(parsed.createCompiledExpression());
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  const auto *_noalias const x = (dbl3_t *) atom->x[0];
+  auto *_noalias const f = (dbl3_t *) thr->get_f()[0];
+  const int3_t *_noalias const bondlist = (int3_t *) neighbor->bondlist[0];
+  const int nlocal = atom->nlocal;
+
+  for (int n = nfrom; n < nto; n++) {
+    const int i1 = bondlist[n].a;
+    const int i2 = bondlist[n].b;
+    const int type = bondlist[n].t;
+
+    const double delx = x[i1].x - x[i2].x;
+    const double dely = x[i1].y - x[i2].y;
+    const double delz = x[i1].z - x[i2].z;
+
+    const double rsq = delx * delx + dely * dely + delz * delz;
+    const double r = sqrt(rsq);
+    const double dr = r - r0[type];
+    const int idx = type2expression[type];
+
+    // force and energy
+
+    double fbond = 0.0;
+    if (r > 0.0) {
+      bondforce[idx].getVariableReference("r") = dr;
+      fbond = -bondforce[idx].evaluate() / r;
+    }
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1].x += delx * fbond;
+      f[i1].y += dely * fbond;
+      f[i1].z += delz * fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2].x -= delx * fbond;
+      f[i2].y -= dely * fbond;
+      f[i2].z -= delz * fbond;
+    }
+
+    double ebond = 0.0;
+    if (EFLAG) {
+      bondpot[idx].getVariableReference("r") = dr;
+      ebond = bondpot[idx].evaluate() - offset[type];
+    }
+    if (EVFLAG)
+      ev_tally_thr(this, i1, i2, nlocal, NEWTON_BOND, ebond, fbond, delx, dely, delz, thr);
+  }
+}
diff --git a/src/OPENMP/bond_lepton_omp.h b/src/OPENMP/bond_lepton_omp.h
new file mode 100644
index 0000000000..7b755e5e9d
--- /dev/null
+++ b/src/OPENMP/bond_lepton_omp.h
@@ -0,0 +1,40 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+// clang-format off
+BondStyle(lepton/omp,BondLeptonOMP);
+// clang-format on
+#else
+
+#ifndef LMP_BOND_LEPTON_OMP_H
+#define LMP_BOND_LEPTON_OMP_H
+
+#include "bond_lepton.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondLeptonOMP : public BondLepton, public ThrOMP {
+
+ public:
+  BondLeptonOMP(class LAMMPS *lmp);
+  void compute(int, int) override;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData *const thr);
+};
+}    // namespace LAMMPS_NS
+#endif
+#endif
diff --git a/src/OPENMP/dihedral_lepton_omp.cpp b/src/OPENMP/dihedral_lepton_omp.cpp
new file mode 100644
index 0000000000..13a1328058
--- /dev/null
+++ b/src/OPENMP/dihedral_lepton_omp.cpp
@@ -0,0 +1,333 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "dihedral_lepton_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "suffix.h"
+#include "math_extra.h"
+
+#include <cmath>
+
+#include "Lepton.h"
+#include "lepton_utils.h"
+#include "omp_compat.h"
+using namespace LAMMPS_NS;
+using MathExtra::dot3;
+
+static constexpr int g_dim = 3;
+
+/* ---------------------------------------------------------------------- */
+
+DihedralLeptonOMP::DihedralLeptonOMP(class LAMMPS *lmp) :
+    DihedralLepton(lmp), ThrOMP(lmp, THR_DIHEDRAL)
+{
+  suffix_flag |= Suffix::OMP;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralLeptonOMP::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->ndihedrallist;
+
+#if defined(_OPENMP)
+#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag, vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, nullptr, thr);
+
+    if (inum > 0) {
+      if (evflag) {
+        if (eflag) {
+          if (force->newton_bond)
+            eval<1, 1, 1>(ifrom, ito, thr);
+          else
+            eval<1, 1, 0>(ifrom, ito, thr);
+        } else {
+          if (force->newton_bond)
+            eval<1, 0, 1>(ifrom, ito, thr);
+          else
+            eval<1, 0, 0>(ifrom, ito, thr);
+        }
+      } else {
+        if (force->newton_bond)
+          eval<0, 0, 1>(ifrom, ito, thr);
+        else
+          eval<0, 0, 0>(ifrom, ito, thr);
+      }
+    }
+    thr->timer(Timer::BOND);
+    reduce_thr(this, eflag, vflag, thr);
+  }    // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void DihedralLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
+{
+  std::vector<Lepton::CompiledExpression> dihedralforce;
+  std::vector<Lepton::CompiledExpression> dihedralpot;
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
+      dihedralforce.emplace_back(parsed.differentiate("phi").createCompiledExpression());
+      if (EFLAG) dihedralpot.emplace_back(parsed.createCompiledExpression());
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  const double *const *const x = atom->x;
+  auto *_noalias const f = (dbl3_t *) thr->get_f()[0];
+  const int * const * const dihedrallist = neighbor->dihedrallist;
+  const int nlocal = atom->nlocal;
+
+  // The dihedral angle "phi" is the angle between n123 and n234
+  // the planes defined by atoms i1,i2,i3, and i2,i3,i4.
+  //
+  // Definitions of vectors: vb12, vb23, vb34, perp12on23
+  //                         proj12on23, perp43on32, proj43on32
+  //
+  //  Note: The positions of the 4 atoms are labeled x[i1], x[i2], x[i3], x[i4]
+  //        (which are also vectors)
+  //
+  //             proj12on23                          proj34on23
+  //             --------->                         ----------->
+  //                           .
+  //                          .
+  //                         .
+  //                  x[i2] .                       x[i3]
+  //    .                __@----------vb23-------->@ . . . .           .
+  //   /|\                /|                        \                  |
+  //    |                /                           \                 |
+  //    |               /                             \                |
+  // perp12vs23        /                               \               |
+  //    |             /                                 \          perp34vs23
+  //    |          vb12                                  \             |
+  //    |           /                                   vb34           |
+  //    |          /                                       \           |
+  //    |         /                                         \          |
+  //    |        /                                           \         |
+  //            @                                             \        |
+  //                                                          _\|     \|/
+  //         x[i1]                                              @
+  //
+  //                                                           x[i4]
+  //
+
+  double vb12[g_dim];    // displacement vector from atom i1 towards atom i2
+  //     vb12[d]       = x[i2][d] - x[i1][d]      (for d=0,1,2)
+  double vb23[g_dim];    // displacement vector from atom i2 towards atom i3
+  //     vb23[d]       = x[i3][d] - x[i2][d]      (for d=0,1,2)
+  double vb34[g_dim];    // displacement vector from atom i3 towards atom i4
+  //     vb34[d]       = x[i4][d] - x[i3][d]      (for d=0,1,2)
+
+  //  n123 & n234: These two unit vectors are normal to the planes
+  //               defined by atoms 1,2,3 and 2,3,4.
+  double n123[g_dim];    //n123=vb23 x vb12 / |vb23 x vb12|  ("x" is cross product)
+  double n234[g_dim];    //n234=vb23 x vb34 / |vb23 x vb34|  ("x" is cross product)
+
+  double proj12on23[g_dim];
+  //    proj12on23[d] = (vb23[d]/|vb23|) * dot3(vb12,vb23)/|vb12|*|vb23|
+  double proj34on23[g_dim];
+  //    proj34on23[d] = (vb34[d]/|vb23|) * dot3(vb34,vb23)/|vb34|*|vb23|
+  double perp12on23[g_dim];
+  //    perp12on23[d] = v12[d] - proj12on23[d]
+  double perp34on23[g_dim];
+  //    perp34on23[d] = v34[d] - proj34on23[d]
+
+  double f1[3], f2[3], f3[3], f4[3];
+
+  for (int n = nfrom; n < nto; n++) {
+    const int i1 = dihedrallist[n][0];
+    const int i2 = dihedrallist[n][1];
+    const int i3 = dihedrallist[n][2];
+    const int i4 = dihedrallist[n][3];
+    const int type = dihedrallist[n][4];
+
+    // ------ Step 1: Compute the dihedral angle "phi" ------
+    //
+
+    // get_phi() calculates the dihedral angle.
+    // This function also calculates the vectors:
+    // vb12, vb23, vb34, n123, and n234, which we will need later.
+
+    const double phi = get_phi(x[i1], x[i2], x[i3], x[i4], domain, vb12, vb23, vb34, n123, n234);
+
+    // ------ Step 2: Compute the gradient of phi with atomic position: ------
+    //
+    // Gradient variables:
+    //
+    // dphi_dx1, dphi_dx2, dphi_dx3, dphi_dx4 are the gradients of phi with
+    // respect to the atomic positions of atoms i1, i2, i3, i4, respectively.
+    // As an example, consider dphi_dx1.  The d'th element is:
+    double dphi_dx1[g_dim];    //                 d phi
+    double dphi_dx2[g_dim];    // dphi_dx1[d] = ----------    (partial derivatives)
+    double dphi_dx3[g_dim];    //               d x[i1][d]
+    double dphi_dx4[g_dim];    //where d=0,1,2 corresponds to x,y,z  (if g_dim==3)
+
+    double dot123 = dot3(vb12, vb23);
+    double dot234 = dot3(vb23, vb34);
+    double L23sqr = dot3(vb23, vb23);
+    double L23 = sqrt(L23sqr);    // (central bond length)
+    double inv_L23sqr = 0.0;
+    double inv_L23 = 0.0;
+    if (L23sqr != 0.0) {
+      inv_L23sqr = 1.0 / L23sqr;
+      inv_L23 = 1.0 / L23;
+    }
+    double neg_inv_L23 = -inv_L23;
+    double dot123_over_L23sqr = dot123 * inv_L23sqr;
+    double dot234_over_L23sqr = dot234 * inv_L23sqr;
+
+    for (int d = 0; d < g_dim; ++d) {
+      // See figure above for a visual definitions of these vectors:
+      proj12on23[d] = vb23[d] * dot123_over_L23sqr;
+      proj34on23[d] = vb23[d] * dot234_over_L23sqr;
+      perp12on23[d] = vb12[d] - proj12on23[d];
+      perp34on23[d] = vb34[d] - proj34on23[d];
+    }
+
+    // --- Compute the gradient vectors dphi/dx1 and dphi/dx4: ---
+
+    // These two gradients point in the direction of n123 and n234,
+    // and are scaled by the distances of atoms 1 and 4 from the central axis.
+    // Distance of atom 1 to central axis:
+    double perp12on23_len = sqrt(dot3(perp12on23, perp12on23));
+    // Distance of atom 4 to central axis:
+    double perp34on23_len = sqrt(dot3(perp34on23, perp34on23));
+
+    double inv_perp12on23 = 0.0;
+    if (perp12on23_len != 0.0) inv_perp12on23 = 1.0 / perp12on23_len;
+    double inv_perp34on23 = 0.0;
+    if (perp34on23_len != 0.0) inv_perp34on23 = 1.0 / perp34on23_len;
+
+    for (int d = 0; d < g_dim; ++d) {
+      dphi_dx1[d] = n123[d] * inv_perp12on23;
+      dphi_dx4[d] = n234[d] * inv_perp34on23;
+    }
+
+    // --- Compute the gradient vectors dphi/dx2 and dphi/dx3: ---
+    //
+    // This is more tricky because atoms 2 and 3 are shared by both planes
+    // 123 and 234 (the angle between which defines "phi").  Moving either
+    // one of these atoms effects both the 123 and 234 planes
+    // Both the 123 and 234 planes intersect with the plane perpendicular to the
+    // central bond axis (vb23).  The two lines where these intersections occur
+    // will shift when you move either atom 2 or atom 3.  The angle between
+    // these lines is the dihedral angle, phi.  We can define four quantities:
+    // dphi123_dx2 is the change in "phi" due to the movement of the 123 plane
+    //             ...as a result of moving atom 2.
+    // dphi234_dx2 is the change in "phi" due to the movement of the 234 plane
+    //             ...as a result of moving atom 2.
+    // dphi123_dx3 is the change in "phi" due to the movement of the 123 plane
+    //             ...as a result of moving atom 3.
+    // dphi234_dx3 is the change in "phi" due to the movement of the 234 plane
+    //             ...as a result of moving atom 3.
+
+    double proj12on23_len = dot123 * inv_L23;
+    double proj34on23_len = dot234 * inv_L23;
+    // Interpretation:
+    //The magnitude of "proj12on23_len" is the length of the proj12on23 vector.
+    //The sign is positive if it points in the same direction as the central
+    //bond (vb23).  Otherwise it is negative.  The same goes for "proj34on23".
+    //(In the example figure in the comment above, both variables are positive.)
+
+    // The forumula used in the 8 lines below explained here:
+    //   "supporting_information/doc/gradient_formula_explanation/"
+    double dphi123_dx2_coef = neg_inv_L23 * (L23 + proj12on23_len);
+    double dphi234_dx2_coef = inv_L23 * proj34on23_len;
+
+    double dphi234_dx3_coef = neg_inv_L23 * (L23 + proj34on23_len);
+    double dphi123_dx3_coef = inv_L23 * proj12on23_len;
+
+    for (int d = 0; d < g_dim; ++d) {
+      // Recall that the n123 and n234 plane normal vectors are proportional to
+      // the dphi/dx1 and dphi/dx2 gradients vectors
+      // It turns out we can save slightly more CPU cycles by expressing
+      // dphi/dx2 and dphi/dx3 as linear combinations of dphi/dx1 and dphi/dx2
+      // which we computed already (instead of n123 & n234).
+      dphi_dx2[d] = dphi123_dx2_coef * dphi_dx1[d] + dphi234_dx2_coef * dphi_dx4[d];
+      dphi_dx3[d] = dphi123_dx3_coef * dphi_dx1[d] + dphi234_dx3_coef * dphi_dx4[d];
+    }
+
+    const int idx = type2expression[type];
+    dihedralforce[idx].getVariableReference("phi") = phi;
+    double m_du_dphi = -dihedralforce[idx].evaluate();
+
+    // ----- Step 4: Calculate the force direction in real space -----
+
+    // chain rule:
+    //          d U          d U      d phi
+    // -f  =   -----   =    -----  *  -----
+    //          d x         d phi      d x
+    for (int d = 0; d < g_dim; ++d) {
+      f1[d] = m_du_dphi * dphi_dx1[d];
+      f2[d] = m_du_dphi * dphi_dx2[d];
+      f3[d] = m_du_dphi * dphi_dx3[d];
+      f4[d] = m_du_dphi * dphi_dx4[d];
+    }
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1].x += f1[0];
+      f[i1].y += f1[1];
+      f[i1].z += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2].x += f2[0];
+      f[i2].y += f2[1];
+      f[i2].z += f2[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3].x += f3[0];
+      f[i3].y += f3[1];
+      f[i3].z += f3[2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4].x += f4[0];
+      f[i4].y += f4[1];
+      f[i4].z += f4[2];
+    }
+
+    double edihedral = 0.0;
+    if (EFLAG) {
+      dihedralpot[idx].getVariableReference("phi") = phi;
+      edihedral = dihedralpot[idx].evaluate();
+    }
+    if (EVFLAG)
+      ev_tally_thr(this, i1, i2, i3, i4, nlocal, NEWTON_BOND, edihedral, f1, f3, f4, -vb12[0],
+                   -vb12[1], -vb12[2], vb23[0], vb23[1], vb23[2], vb34[0], vb34[1], vb34[2], thr);
+  }
+}
diff --git a/src/OPENMP/dihedral_lepton_omp.h b/src/OPENMP/dihedral_lepton_omp.h
new file mode 100644
index 0000000000..174aea8f26
--- /dev/null
+++ b/src/OPENMP/dihedral_lepton_omp.h
@@ -0,0 +1,40 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef DIHEDRAL_CLASS
+// clang-format off
+DihedralStyle(lepton/omp,DihedralLeptonOMP);
+// clang-format on
+#else
+
+#ifndef LMP_DIHEDRAL_LEPTON_OMP_H
+#define LMP_DIHEDRAL_LEPTON_OMP_H
+
+#include "dihedral_lepton.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class DihedralLeptonOMP : public DihedralLepton, public ThrOMP {
+
+ public:
+  DihedralLeptonOMP(class LAMMPS *lmp);
+  void compute(int, int) override;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData *const thr);
+};
+}    // namespace LAMMPS_NS
+#endif
+#endif
diff --git a/src/OPENMP/pair_lepton_coul_omp.cpp b/src/OPENMP/pair_lepton_coul_omp.cpp
new file mode 100644
index 0000000000..a67779aeb8
--- /dev/null
+++ b/src/OPENMP/pair_lepton_coul_omp.cpp
@@ -0,0 +1,196 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "pair_lepton_coul_omp.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neigh_list.h"
+#include "suffix.h"
+
+#include <cmath>
+
+#include "Lepton.h"
+#include "lepton_utils.h"
+#include "omp_compat.h"
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairLeptonCoulOMP::PairLeptonCoulOMP(LAMMPS *lmp) : PairLeptonCoul(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  suffix_flag |= Suffix::OMP;
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLeptonCoulOMP::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag, vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, nullptr, thr);
+
+    if (evflag) {
+      if (eflag) {
+        if (force->newton_pair)
+          eval<1, 1, 1>(ifrom, ito, thr);
+        else
+          eval<1, 1, 0>(ifrom, ito, thr);
+      } else {
+        if (force->newton_pair)
+          eval<1, 0, 1>(ifrom, ito, thr);
+        else
+          eval<1, 0, 0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair)
+        eval<0, 0, 1>(ifrom, ito, thr);
+      else
+        eval<0, 0, 0>(ifrom, ito, thr);
+    }
+
+    thr->timer(Timer::PAIR);
+    reduce_thr(this, eflag, vflag, thr);
+  }    // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairLeptonCoulOMP::eval(int iifrom, int iito, ThrData *const thr)
+{
+  const auto *_noalias const x = (dbl3_t *) atom->x[0];
+  auto *_noalias const f = (dbl3_t *) thr->get_f()[0];
+  const auto *_noalias const q = atom->q;
+  const int *_noalias const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double *_noalias const special_coul = force->special_coul;
+
+  const int *const ilist = list->ilist;
+  const int *const numneigh = list->numneigh;
+  const int *const *const firstneigh = list->firstneigh;
+  double fxtmp, fytmp, fztmp;
+
+  const double q2e = sqrt(force->qqrd2e);
+
+  std::vector<Lepton::CompiledExpression> pairforce;
+  std::vector<Lepton::CompiledExpression> pairpot;
+  std::vector<std::pair<bool, bool>> have_q;
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
+      pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
+      pairforce.back().getVariableReference("r");
+      have_q.emplace_back(std::make_pair(true, true));
+
+      // check if there are references to charges
+      try {
+        pairforce.back().getVariableReference("qi");
+      } catch (std::exception &) {
+        have_q.back().first = false;
+      }
+      try {
+        pairforce.back().getVariableReference("qj");
+      } catch (std::exception &) {
+        have_q.back().second = false;
+      }
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  // loop over neighbors of my atoms
+
+  for (int ii = iifrom; ii < iito; ++ii) {
+    const int i = ilist[ii];
+    const double xtmp = x[i].x;
+    const double ytmp = x[i].y;
+    const double ztmp = x[i].z;
+    const int itype = type[i];
+    const int *jlist = firstneigh[i];
+    const int jnum = numneigh[i];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    for (int jj = 0; jj < jnum; jj++) {
+      int j = jlist[jj];
+      const double factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+      const int jtype = type[j];
+
+      const double delx = xtmp - x[j].x;
+      const double dely = ytmp - x[j].y;
+      const double delz = ztmp - x[j].z;
+      const double rsq = delx * delx + dely * dely + delz * delz;
+
+      if (rsq < cutsq[itype][jtype]) {
+        const double r = sqrt(rsq);
+        const int idx = type2expression[itype][jtype];
+        pairforce[idx].getVariableReference("r") = r;
+        if (have_q[idx].first) pairforce[idx].getVariableReference("qi") = q2e * q[i];
+        if (have_q[idx].second) pairforce[idx].getVariableReference("qj") = q2e * q[j];
+        const double fpair = -pairforce[idx].evaluate() / r * factor_coul;
+
+        fxtmp += delx * fpair;
+        fytmp += dely * fpair;
+        fztmp += delz * fpair;
+        if (NEWTON_PAIR || j < nlocal) {
+          f[j].x -= delx * fpair;
+          f[j].y -= dely * fpair;
+          f[j].z -= delz * fpair;
+        }
+
+        double ecoul = 0.0;
+        if (EFLAG) {
+          pairpot[idx].getVariableReference("r") = r;
+          if (have_q[idx].first) pairpot[idx].getVariableReference("qi") = q2e * q[i];
+          if (have_q[idx].second) pairpot[idx].getVariableReference("qj") = q2e * q[j];
+          ecoul = pairpot[idx].evaluate();
+          ecoul *= factor_coul;
+        }
+
+        if (EVFLAG)
+          ev_tally_thr(this, i, j, nlocal, NEWTON_PAIR, 0.0, ecoul, fpair, delx, dely, delz, thr);
+      }
+    }
+    f[i].x += fxtmp;
+    f[i].y += fytmp;
+    f[i].z += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLeptonCoulOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLeptonCoul::memory_usage();
+
+  return bytes;
+}
diff --git a/src/OPENMP/pair_lepton_coul_omp.h b/src/OPENMP/pair_lepton_coul_omp.h
new file mode 100644
index 0000000000..b6d04e7e02
--- /dev/null
+++ b/src/OPENMP/pair_lepton_coul_omp.h
@@ -0,0 +1,44 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(lepton/coul/omp,PairLeptonCoulOMP);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_LEPTON_COUL_OMP_H
+#define LMP_PAIR_LEPTON_COUL_OMP_H
+
+#include "pair_lepton_coul.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLeptonCoulOMP : public PairLeptonCoul, public ThrOMP {
+
+ public:
+  PairLeptonCoulOMP(class LAMMPS *);
+
+  void compute(int, int) override;
+  double memory_usage() override;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData *const thr);
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/OPENMP/pair_lepton_omp.cpp b/src/OPENMP/pair_lepton_omp.cpp
new file mode 100644
index 0000000000..2c96b63a7f
--- /dev/null
+++ b/src/OPENMP/pair_lepton_omp.cpp
@@ -0,0 +1,172 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "pair_lepton_omp.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neigh_list.h"
+#include "suffix.h"
+
+#include <cmath>
+
+#include "Lepton.h"
+#include "lepton_utils.h"
+#include "omp_compat.h"
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairLeptonOMP::PairLeptonOMP(LAMMPS *lmp) : PairLepton(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  suffix_flag |= Suffix::OMP;
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLeptonOMP::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag, vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, nullptr, thr);
+
+    if (evflag) {
+      if (eflag) {
+        if (force->newton_pair)
+          eval<1, 1, 1>(ifrom, ito, thr);
+        else
+          eval<1, 1, 0>(ifrom, ito, thr);
+      } else {
+        if (force->newton_pair)
+          eval<1, 0, 1>(ifrom, ito, thr);
+        else
+          eval<1, 0, 0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair)
+        eval<0, 0, 1>(ifrom, ito, thr);
+      else
+        eval<0, 0, 0>(ifrom, ito, thr);
+    }
+
+    thr->timer(Timer::PAIR);
+    reduce_thr(this, eflag, vflag, thr);
+  }    // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairLeptonOMP::eval(int iifrom, int iito, ThrData *const thr)
+{
+  const auto *_noalias const x = (dbl3_t *) atom->x[0];
+  auto *_noalias const f = (dbl3_t *) thr->get_f()[0];
+  const int *_noalias const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double *_noalias const special_lj = force->special_lj;
+
+  const int *const ilist = list->ilist;
+  const int *const numneigh = list->numneigh;
+  const int *const *const firstneigh = list->firstneigh;
+  double fxtmp, fytmp, fztmp;
+
+  std::vector<Lepton::CompiledExpression> pairforce;
+  std::vector<Lepton::CompiledExpression> pairpot;
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
+      pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  // loop over neighbors of my atoms
+
+  for (int ii = iifrom; ii < iito; ++ii) {
+    const int i = ilist[ii];
+    const double xtmp = x[i].x;
+    const double ytmp = x[i].y;
+    const double ztmp = x[i].z;
+    const int itype = type[i];
+    const int *jlist = firstneigh[i];
+    const int jnum = numneigh[i];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    for (int jj = 0; jj < jnum; jj++) {
+      int j = jlist[jj];
+      const double factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+      const int jtype = type[j];
+
+      const double delx = xtmp - x[j].x;
+      const double dely = ytmp - x[j].y;
+      const double delz = ztmp - x[j].z;
+      const double rsq = delx * delx + dely * dely + delz * delz;
+
+      if (rsq < cutsq[itype][jtype]) {
+        const double r = sqrt(rsq);
+        const int idx = type2expression[itype][jtype];
+        pairforce[idx].getVariableReference("r") = r;
+        const double fpair = -pairforce[idx].evaluate() / r * factor_lj;
+
+        fxtmp += delx * fpair;
+        fytmp += dely * fpair;
+        fztmp += delz * fpair;
+        if (NEWTON_PAIR || j < nlocal) {
+          f[j].x -= delx * fpair;
+          f[j].y -= dely * fpair;
+          f[j].z -= delz * fpair;
+        }
+
+        double evdwl = 0.0;
+        if (EFLAG) {
+          pairpot[idx].getVariableReference("r") = r;
+          evdwl = pairpot[idx].evaluate() - offset[itype][jtype];
+          evdwl *= factor_lj;
+        }
+
+        if (EVFLAG)
+          ev_tally_thr(this, i, j, nlocal, NEWTON_PAIR, evdwl, 0.0, fpair, delx, dely, delz, thr);
+      }
+    }
+    f[i].x += fxtmp;
+    f[i].y += fytmp;
+    f[i].z += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLeptonOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLepton::memory_usage();
+
+  return bytes;
+}
diff --git a/src/OPENMP/pair_lepton_omp.h b/src/OPENMP/pair_lepton_omp.h
new file mode 100644
index 0000000000..706e463bda
--- /dev/null
+++ b/src/OPENMP/pair_lepton_omp.h
@@ -0,0 +1,44 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(lepton/omp,PairLeptonOMP);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_LEPTON_OMP_H
+#define LMP_PAIR_LEPTON_OMP_H
+
+#include "pair_lepton.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLeptonOMP : public PairLepton, public ThrOMP {
+
+ public:
+  PairLeptonOMP(class LAMMPS *);
+
+  void compute(int, int) override;
+  double memory_usage() override;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData *const thr);
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt
index 9819a53e10..45501d92ae 100644
--- a/unittest/CMakeLists.txt
+++ b/unittest/CMakeLists.txt
@@ -34,6 +34,14 @@ foreach(_FLAG ${CMAKE_TUNE_FLAGS})
   add_compile_options(${_FLAG})
 endforeach()
 
+# must repeat handling coverage for older CMake
+if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND ENABLE_COVERAGE)
+  if(CMAKE_VERSION VERSION_LESS 3.13)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage")
+  else()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_${CMAKE_BUILD_TYPE}_FLAGS} --coverage")
+  endif()
+endif()
 
 ########################################
 # General tests using the LAMMPS executable itself
diff --git a/unittest/force-styles/tests/angle-lepton.yaml b/unittest/force-styles/tests/angle-lepton.yaml
new file mode 100644
index 0000000000..ea108cfdb1
--- /dev/null
+++ b/unittest/force-styles/tests/angle-lepton.yaml
@@ -0,0 +1,88 @@
+---
+lammps_version: 22 Dec 2022
+date_generated: Fri Dec 23 15:10:29 2022
+epsilon: 7.5e-13
+skip_tests:
+prerequisites: ! |
+  atom full
+  angle lepton
+pre_commands: ! ""
+post_commands: ! ""
+input_file: in.fourmol
+angle_style: lepton
+angle_coeff: ! |
+  1 110.1 "k*theta^2; k=75.0"
+  2 111.0 "k*theta^2; k=45.0"
+  3 120.0 "k*theta^2; k=50.0"
+  4 108.5 "k*theta^2; k=100.0"
+equilibrium: 4 1.9216075064457567 1.9373154697137058 2.0943951023931953 1.8936822384138476
+extract: ! |
+  theta0 1
+natoms: 29
+init_energy: 41.53081789649104
+init_stress: ! |2-
+   8.9723357320869297e+01 -8.7188643750026529e+01 -2.5347135708427655e+00  9.2043419883119782e+01 -2.8187238090404904e+01 -1.5291148024926793e+00
+init_forces: ! |2
+    1  4.7865489310693540e+01  7.8760925902181516e+00 -3.2694525514709866e+01
+    2 -1.1124882516177341e+00 -9.0075464203887403e+00 -7.2431691227364459e+00
+    3 -5.9057050592859328e+00  5.3263619873546261e+01  5.2353380124691469e+01
+    4 -1.6032230038990633e+01 -2.4560529343731403e+01  1.2891625920422307e+01
+    5 -4.4802331573497639e+01 -4.8300919461089379e+01 -2.3310767889219324e+01
+    6  4.7083124388174824e+01 -9.5212933434476312e+00 -3.2526392870546800e+01
+    7 -1.6208182775476303e+01  1.4458587960739102e+01 -3.5314745459502710e+00
+    8 -6.5664612141881040e+00 -2.5126850154274202e+01  8.2187944731423329e+01
+    9 -1.5504395262358301e+01  1.6121044185227817e+01 -4.2007069622477866e-01
+   10  9.9863759179365275e+00  4.1873540105704549e+01 -6.6085640966037403e+01
+   11 -2.0441876158908627e+01 -6.5186824168985984e+00  9.0023620309811072e+00
+   12 -1.0772126658369565e+01 -1.0807367300158219e+01 -9.6049647456797871e+00
+   13  2.8847886813946291e+00  7.2973241014859198e+00 -1.0414233993842981e-01
+   14  1.5267407478336393e+01 -9.4754911480231776e+00 -6.6307012925544200e+00
+   15  1.2402914209534773e+01 -6.2644630791613967e+00  1.8484576795819933e+01
+   16  3.8927757686508357e-01  1.0690061587911176e+01  6.1542759189377696e+00
+   17  1.4664194297570785e+00 -1.9971277376602425e+00  1.0776844613215999e+00
+   18  1.5785371874873322e-01  1.6495665212200166e+00 -6.6944747776990434e+00
+   19 -1.9328033033421670e+00 -2.4078805870919706e+00  2.8669575541313534e+00
+   20  1.7749495845934338e+00  7.5831406587195394e-01  3.8275172235676900e+00
+   21  3.4186149299343742e+00  4.2795410364249484e+00 -1.2789555411020650e+01
+   22 -6.0875600315279677e+00 -4.1504951869796605e+00  4.5212856070195766e+00
+   23  2.6689451015935934e+00 -1.2904584944528752e-01  8.2682698040010738e+00
+   24 -1.3053945393770587e+00  5.0741459325183271e+00 -3.0209518576073018e+00
+   25 -1.0471133765834284e+00 -3.5082261409793856e+00  5.7374874908501228e-01
+   26  2.3525079159604871e+00 -1.5659197915389413e+00  2.4472031085222894e+00
+   27 -2.8720725187343754e-01  2.3577465459557132e+00 -8.0312673032168869e-01
+   28 -6.2799575211500369e-01 -1.4097313073755862e+00  3.2747938980616453e-02
+   29  9.1520300398844123e-01 -9.4801523858012704e-01  7.7037879134107223e-01
+run_energy: 41.28323739029462
+run_stress: ! |2-
+   8.8236221596506681e+01 -8.6492260623309562e+01 -1.7439609731970940e+00  9.0601855980531312e+01 -2.8735005690484968e+01 -2.6097632235197477e+00
+run_forces: ! |2
+    1  4.7316793853445830e+01  8.2815577813110188e+00 -3.2021703111755464e+01
+    2 -1.1508196824491330e+00 -9.3814982172707460e+00 -7.5761211707510139e+00
+    3 -5.1083163691832576e+00  5.2667553294971619e+01  5.1784852458007592e+01
+    4 -1.6078177452605999e+01 -2.4156048365236213e+01  1.3140924677013103e+01
+    5 -4.4915734474022280e+01 -4.8095168640411821e+01 -2.3331149037574161e+01
+    6  4.7077916942842350e+01 -9.5906213020090156e+00 -3.2570331503075487e+01
+    7 -1.6228599672412471e+01  1.4485102617342370e+01 -3.5441153194985300e+00
+    8 -6.5097893981550730e+00 -2.5117582302614530e+01  8.2131369512416001e+01
+    9 -1.5527440970965937e+01  1.6147270375910470e+01 -4.0812004993325646e-01
+   10  1.0070812216240984e+01  4.1571532807578805e+01 -6.5968810328796337e+01
+   11 -2.0431584971707451e+01 -6.4817395192247664e+00  8.9879981618991636e+00
+   12 -1.0884695976714678e+01 -1.1067390190389006e+01 -9.1551242768940568e+00
+   13  2.8052913970098801e+00  7.1296301666594912e+00  1.3173039168682621e-02
+   14  1.5254877537873529e+01 -8.9700095533297350e+00 -6.5719846903613162e+00
+   15  1.2392009100170984e+01 -6.0827695435257292e+00  1.7929674392339596e+01
+   16  4.7158712437377481e-01  1.0631038523396533e+01  6.0960085687560355e+00
+   17  1.4458707962589659e+00 -1.9708579331587350e+00  1.0634586790394520e+00
+   18  1.4201882413835909e-01  1.4265339757773337e+00 -5.7663956896747992e+00
+   19 -1.6609130686729365e+00 -2.0735307593211125e+00  2.4755525101127143e+00
+   20  1.5188942445345774e+00  6.4699678354377899e-01  3.2908431795620849e+00
+   21  3.2242729509516406e+00  4.0079233768386153e+00 -1.2047892238650988e+01
+   22 -5.7215184687399772e+00 -3.8871624402883409e+00  4.2679223469272234e+00
+   23  2.4972455177883366e+00 -1.2076093655027398e-01  7.7799698917237645e+00
+   24 -1.1661978296905471e+00  4.5271404898674854e+00 -2.6925565853370195e+00
+   25 -9.2712094527152167e-01 -3.1291890525017125e+00  5.1208215565053827e-01
+   26  2.0933187749620688e+00 -1.3979514373657731e+00  2.1804744296864813e+00
+   27 -2.6804542538020537e-01  2.1830651328698103e+00 -7.3931790038945400e-01
+   28 -5.7927072943128310e-01 -1.3052929090347909e+00  2.8365455885795865e-02
+   29  8.4731615481148848e-01 -8.7777222383501941e-01  7.1095244450365813e-01
+...
diff --git a/unittest/force-styles/tests/bond-lepton.yaml b/unittest/force-styles/tests/bond-lepton.yaml
new file mode 100644
index 0000000000..32d0d1453c
--- /dev/null
+++ b/unittest/force-styles/tests/bond-lepton.yaml
@@ -0,0 +1,89 @@
+---
+lammps_version: 22 Dec 2022
+date_generated: Thu Dec 22 09:47:41 2022
+epsilon: 2.5e-13
+skip_tests:
+prerequisites: ! |
+  atom full
+  bond lepton
+pre_commands: ! ""
+post_commands: ! ""
+input_file: in.fourmol
+bond_style: lepton
+bond_coeff: ! |
+  1 1.5 "k*r^2; k=250.0"
+  2 1.1 "k2*r^2 + k3*r^3 + k4*r^4; k2=300.0; k3=-100.0; k4=50.0"
+  3 1.3 "k*r^2; k=350.0"
+  4 1.2 "k*(r-0.2)^2; k=500.0"
+  5 1.0 "k*r^2; k=450.0"
+equilibrium: 5 1.5 1.1 1.3 1.2 1
+extract: ! |
+  r0 1
+natoms: 29
+init_energy: -1.7041746783107878
+init_stress: ! |-
+  -4.7778964706834920e+01 -9.3066674567350432e+01  3.4789470658440035e+02 -3.0023920169312170e+01 -8.0421418879842847e+01  5.8592449335969732e+01
+init_forces: ! |2
+    1 -5.9149914305071416e+00 -3.7728809612345245e+01 -2.7769433362963369e+01
+    2 -9.4281609567839944e+00 -7.7586487054273015e+00  1.1096676787527940e+01
+    3  3.2211742366572125e+01  2.7682361264425523e+01 -7.0109911672970497e+00
+    4  4.9260777576375503e+00 -1.3809750102765932e+00  3.4951785613141868e+00
+    5 -1.2606902198593501e+00 -1.9373397933007170e+00  6.4372463095041841e+00
+    6 -3.8858476307965482e+01  6.8567296300319640e+01  1.9889888806614337e+02
+    7  7.5297927100028144e+00 -3.8622600737556944e+01 -1.9268793182212875e+02
+    8  1.3018665172824681e+01 -1.2902789438539877e+01  3.2406676637830003e+00
+    9  7.4343536239661590e-01  8.0072549738604493e-01  3.2899591078538779e+00
+   10  6.1558871886113291e+00 -2.2419470219698296e+00  1.0080175092279852e+01
+   11 -3.7020922615305768e-01 -9.1704102274126453e-01 -1.5046795827370363e+00
+   12  5.2437190958790678e+00  3.4225915524442998e+00 -2.5523597276998897e+00
+   13 -1.1277007635800260e+01  4.4610677459696646e+00  2.1195215396108269e-01
+   14  2.9813926585641828e+00 -6.0667387499775116e-01  7.7317115100728788e+00
+   15  2.5872825164662799e-01 -9.9415365173790704e+00 -3.5428115826174169e+00
+   16  5.2775953236493464e+01 -3.1855535724919463e+01 -1.6524229620195118e+02
+   17 -5.8735858023559175e+01  4.0959855098908882e+01  1.5582804819495431e+02
+   18 -9.0963607969319646e+00 -4.3343406270234155e+00 -1.7623055551859267e+01
+   19  1.2597490501067170e+01  8.0591915019111742e+00  1.5261489294231819e+01
+   20 -3.5011297041352050e+00 -3.7248508748877587e+00  2.3615662576274494e+00
+   21 -1.5332952658285048e+00  5.9630208068632040e-01 -7.4967230017303281e+00
+   22  4.2380253233105529e+00  1.0270453290850614e+00  6.6489894421385651e+00
+   23 -2.7047300574820481e+00 -1.6233474097713818e+00  8.4773355959176278e-01
+   24 -6.6588083188726532e+00  3.5110922792825918e+00 -6.5625174267043489e+00
+   25  7.9844426562464141e+00 -1.2853795683286129e+00  6.7123710742192300e+00
+   26 -1.3256343373737607e+00 -2.2257127109539789e+00 -1.4985364751488087e-01
+   27  6.6999960289138851e+00  6.3808952243186141e+00  2.0100808779497248e+00
+   28 -8.8466157439236681e-01  3.8018717064230995e-01 -5.9857060538593476e-01
+   29 -5.8153344545215182e+00 -6.7610823949609244e+00 -1.4115102725637900e+00
+run_energy: -2.215756106484914
+run_stress: ! |-
+  -4.6127506998693484e+01 -9.2129732247211749e+01  3.4548310342284810e+02 -2.9841348469661163e+01 -7.8434962689387717e+01  5.9253167412123155e+01
+run_forces: ! |2
+    1 -5.8451208652159004e+00 -3.7483084455000643e+01 -2.7706576989352534e+01
+    2 -9.4646964278974774e+00 -7.8058897724822449e+00  1.1098831256058579e+01
+    3  3.1827086102630346e+01  2.7573911030624821e+01 -6.9576662575837211e+00
+    4  5.1502169659901655e+00 -1.4367546726785101e+00  3.6631301025186187e+00
+    5 -1.2208420775139264e+00 -1.8781699435112362e+00  6.2332639085051911e+00
+    6 -3.8491523409043303e+01  6.8063273218541468e+01  1.9723141045830272e+02
+    7  7.4838209349394775e+00 -3.8394258853636330e+01 -1.9092625515909930e+02
+    8  1.2676329319901857e+01 -1.2475162287097550e+01  3.3659783337736577e+00
+    9  6.8845241565874460e-01  7.3814593866184031e-01  3.0434095400342533e+00
+   10  6.2545583994797553e+00 -2.9600470917047201e+00  9.4247125735981765e+00
+   11 -1.9554747834212524e-01 -4.8434314068172696e-01 -7.9452259566032057e-01
+   12  5.2092795750960841e+00  3.1431929551776721e+00 -3.1346654851373348e+00
+   13 -1.1496483840617872e+01  4.5245217971580018e+00  2.1348220240918236e-01
+   14  3.1913399826660909e+00 -6.3760720126489068e-01  8.2740980433927742e+00
+   15  2.7338564489784484e-01 -9.7206665011069671e+00 -3.4841809697094543e+00
+   16  5.2461611410918316e+01 -3.1639255494702798e+01 -1.6483607587596811e+02
+   17 -5.8501866653548078e+01  4.0872194473703807e+01  1.5529162691391761e+02
+   18 -7.0990354207248405e+00 -2.4743922643289666e+00 -1.7824398936159682e+01
+   19  1.2019842510974870e+01  7.7105128268768715e+00  1.4523712108141252e+01
+   20 -4.9208070902500296e+00 -5.2361205625479048e+00  3.3006868280184283e+00
+   21 -1.8548628650934149e+00  2.7467524264262122e-01 -6.7601469408617412e+00
+   22  3.9136757840663186e+00  9.5561415744904055e-01  6.1181929861632272e+00
+   23 -2.0588129189729036e+00 -1.2302894000916618e+00  6.4195395469851357e-01
+   24 -5.7681973234153086e+00  2.0209144998436366e+00 -5.2864044021513967e+00
+   25  6.3696975292216704e+00 -1.0109756418053095e+00  5.3564043759405795e+00
+   26 -6.0150020580636188e-01 -1.0099388580383271e+00 -6.9999973789182365e-02
+   27  6.8467535469188450e+00  5.7500299184200578e+00  2.2775780974490298e+00
+   28 -1.3929430925479587e+00  5.9772788540443345e-01 -9.4056106886485980e-01
+   29 -5.4538104543708865e+00 -6.3477578038244911e+00 -1.3370170285841700e+00
+...
diff --git a/unittest/force-styles/tests/dihedral-lepton.yaml b/unittest/force-styles/tests/dihedral-lepton.yaml
new file mode 100644
index 0000000000..2e5dee9603
--- /dev/null
+++ b/unittest/force-styles/tests/dihedral-lepton.yaml
@@ -0,0 +1,87 @@
+---
+lammps_version: 22 Dec 2022
+date_generated: Mon Dec 26 15:54:31 2022
+epsilon: 2.5e-13
+skip_tests:
+prerequisites: ! |
+  atom full
+  dihedral lepton
+pre_commands: ! ""
+post_commands: ! ""
+input_file: in.fourmol
+dihedral_style: lepton
+dihedral_coeff: ! |
+  1  "k*(1 + d*cos(n*phi)); k=75.0; d=1; n=2"
+  2  "k*(1 + d*cos(n*phi)); k=45.0; d=-1; n=4"
+  3  "k*(1 + d*cos(n*phi)); k=56.0; d=-1; n=2"
+  4  "k*(1 + d*cos(n*phi)); k=23.0; d=1; n=1"
+  5  "k*(1 + d*cos(n*phi)); k=19.0; d=-1; n=3"
+extract: ! ""
+natoms: 29
+init_energy: 789.1739585864801
+init_stress: ! |-
+  -6.2042484436524084e+01  1.2714037725306235e+02 -6.5097892816538319e+01  2.6648135399224245e+01  1.3495574921305200e+02  1.6236422290928138e+02
+init_forces: ! |2
+    1 -2.1511698742846065e+01  4.0249060564856002e+01 -9.0013321196300723e+01
+    2 -8.1931697051662269e+00  4.2308632119001643e+00 -4.0030670619000830e+00
+    3  9.1213724359021342e+01 -1.3766351447039602e+02  8.1969246558441185e+01
+    4 -4.8202572898596188e+01 -8.0465316960732949e+00  6.4757081520864730e+01
+    5 -6.2252471689207333e+01  2.2804485244022288e+01 -5.3285277341381336e+00
+    6  9.1271091191894925e+01  1.3743691097166172e+02 -3.9344000137592744e+01
+    7 -4.7435622518386914e+01 -5.1206081255886943e+01  8.4101355581705377e+00
+    8  2.2568717344776428e+02  1.6221073825524221e+02  5.7667169753528491e+01
+    9 -2.0794865226207477e+00  5.0314964909956039e+00 -7.5468528100484278e-01
+   10 -4.0476567806811579e+02 -4.7270660984257188e+02 -9.9999223894595502e+01
+   11  3.9909170606249532e+01  2.0810704935563015e+02 -1.3665198019985252e+02
+   12  6.2493704719337998e+01  7.0253447917427536e+01  1.9569964347346638e+02
+   13  2.9234925409867785e+01  6.7200938735330837e+01  1.4104379799580227e+02
+   14  7.2099736490024071e+01 -1.0032854911322354e+02 -3.5674421421421016e+01
+   15 -1.0059762933494233e+02  3.4057372960589930e+01 -1.0291545492293885e+02
+   16 -9.2273705073611552e+01 -1.2566881299602967e+02 -6.3115663814665560e+01
+   17  1.7540250832933313e+02  1.4403773566652492e+02  2.8253270804136410e+01
+   18  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   19  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   20  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   21  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   22  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   23  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   24  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   25  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   26  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   27  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   28  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   29  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+run_energy: 786.1866358550079
+run_stress: ! |-
+  -6.1891690150881558e+01  1.2738938495389718e+02 -6.5497694803015776e+01  2.6197221636385898e+01  1.3475397071042019e+02  1.6145289649182797e+02
+run_forces: ! |2
+    1 -2.2302877929458504e+01  4.0672550312262082e+01 -9.0501596668366645e+01
+    2 -7.6795593796038872e+00  3.9696254383022449e+00 -3.7581780677357592e+00
+    3  9.2113038158654120e+01 -1.3743858583367702e+02  8.2424527694664405e+01
+    4 -4.8297128598674377e+01 -8.1171172512686063e+00  6.4789088490585158e+01
+    5 -6.2249945690217785e+01  2.2813353689490324e+01 -5.3758961093281243e+00
+    6  9.1082266628006437e+01  1.3760435354838000e+02 -3.9497610280357804e+01
+    7 -4.6896902011280702e+01 -5.0626904069869482e+01  8.3785410081477032e+00
+    8  2.2272760695742238e+02  1.5895499756012134e+02  5.7194518287049661e+01
+    9 -1.3424389406807151e+00  5.5961120716833399e+00 -1.0522843139660516e+00
+   10 -4.0569661830987496e+02 -4.7090645706702190e+02 -9.7628440388580344e+01
+   11  4.2260633810406674e+01  2.0874271156158213e+02 -1.3676519733514760e+02
+   12  6.2351939715965081e+01  6.8740733618467345e+01  1.9368291702263957e+02
+   13  2.9034913938879313e+01  6.7392732937882698e+01  1.4128237950589556e+02
+   14  7.1584708215786918e+01 -9.9391162196277406e+01 -3.5112483074387477e+01
+   15 -1.0011391208839510e+02  3.3797184010534494e+01 -1.0280672267359483e+02
+   16 -9.3370884293886760e+01 -1.2693997516553874e+02 -6.3467167983741845e+01
+   17  1.7679515981695175e+02  1.4513584683494716e+02  2.8213604886224367e+01
+   18  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   19  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   20  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   21  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   22  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   23  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   24  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   25  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   26  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   27  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   28  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   29  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+...
diff --git a/unittest/force-styles/tests/mol-pair-lepton.yaml b/unittest/force-styles/tests/mol-pair-lepton.yaml
new file mode 100644
index 0000000000..81d8286588
--- /dev/null
+++ b/unittest/force-styles/tests/mol-pair-lepton.yaml
@@ -0,0 +1,98 @@
+---
+lammps_version: 22 Dec 2022
+date_generated: Thu Dec 22 09:57:30 2022
+epsilon: 5e-14
+skip_tests:
+prerequisites: ! |
+  atom full
+  pair lepton
+pre_commands: ! |
+  variable write_data_pair index ij
+post_commands: ! |
+  pair_modify shift yes
+input_file: in.fourmol
+pair_style: lepton 8.0
+pair_coeff: ! |
+  * *    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.015;sig=3.1"
+  1 1    '4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.02;sig=2.5'
+  1 2    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.01;sig=1.75"
+  1 3    '4.0*eps*((sig/r)^12-(sig/r)^6);  eps=0.02;sig=2.85'
+  1 4*5  "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.0173205; 	sig=2.8"
+  2 2    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.005;sig=1.0"
+  2 3    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.01;sig=2.1"
+  2 4    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.005;sig=0.5"
+  2 5    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.00866025;sig=2.05"
+  3 3    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.02;sig=3.2"
+  3 4    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.0173205;sig=3.15"
+  3 5    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.0173205;sig=3.15"
+extract: ! ""
+natoms: 29
+init_vdwl: 749.2468149791969
+init_coul: 0
+init_stress: ! |2-
+   2.1793853434038242e+03  2.1988955172192768e+03  4.6653977523326257e+03 -7.5956547636050584e+02  2.4751536734032861e+01  6.6652028436400667e+02
+init_forces: ! |2
+    1 -2.3333390280895912e+01  2.6994567613322641e+02  3.3272827850356805e+02
+    2  1.5828554630414899e+02  1.3025008843535872e+02 -1.8629682358935722e+02
+    3 -1.3528903738169066e+02 -3.8704313358319990e+02 -1.4568978437133106e+02
+    4 -7.8711096705893366e+00  2.1350518625373538e+00 -5.5954532185548134e+00
+    5 -2.5176757268228540e+00 -4.0521510681020239e+00  1.2152704057877019e+01
+    6 -8.3190662465252137e+02  9.6394149462625603e+02  1.1509093566509248e+03
+    7  5.8203388932513583e+01 -3.3608997951626793e+02 -1.7179617996573040e+03
+    8  1.4451392284291535e+02 -1.0927475861088995e+02  3.9990593492420442e+02
+    9  7.9156945283097443e+01  8.5273009783986538e+01  3.5032175698445189e+02
+   10  5.3118875219105360e+02 -6.1040990859419412e+02 -1.8355872642619292e+02
+   11 -2.3530157267965532e+00 -5.9077640073819717e+00 -9.6590723955414290e+00
+   12  1.7527155146800425e+01  1.0633119523437511e+01 -7.9254398064483169e+00
+   13  8.0986409579532967e+00 -3.2098088264781546e+00 -1.4896399843793839e-01
+   14 -3.3852721292265153e+00  6.8636181241903649e-01 -8.7507190862499868e+00
+   15 -2.0454999188605300e-01  8.4846165523049883e+00  3.0131615419406712e+00
+   16  4.6326310311812108e+02 -3.3087715736498188e+02 -1.1893024561782554e+03
+   17 -4.5334300923766727e+02  3.1554283255882569e+02  1.2058417793481203e+03
+   18 -1.8862623280672661e-02 -3.3402010907951661e-02  3.1000479299095260e-02
+   19  3.1843079640570047e-04 -2.3918627818763426e-04  1.7427252638513439e-03
+   20 -9.9760831209706009e-04 -1.0209184826753090e-03  3.6910972636601454e-04
+   21 -7.1566125273265186e+01 -8.1615678329920655e+01  2.2589561408339890e+02
+   22 -1.0808835729977498e+02 -2.6193787235943887e+01 -1.6957904943161401e+02
+   23  1.7964455474779487e+02  1.0782097695276950e+02 -5.6305786479140636e+01
+   24  3.6591406576584546e+01 -2.1181587621785579e+02  1.1218301872572377e+02
+   25 -1.4851489147738798e+02  2.3907118122949061e+01 -1.2485634873166291e+02
+   26  1.1191129453598219e+02  1.8789774664223384e+02  1.2650137204319904e+01
+   27  5.1810388677546001e+01 -2.2705458321213797e+02  9.0849111082069669e+01
+   28 -1.8041307121444069e+02  7.7534042932772905e+01 -1.2206956760706598e+02
+   29  1.2861057254925012e+02  1.4952711274394568e+02  3.1216025556267880e+01
+run_vdwl: 719.4530651193046
+run_coul: 0
+run_stress: ! |2-
+   2.1330153957371017e+03  2.1547728168285516e+03  4.3976497417710125e+03 -7.3873328448298525e+02  4.1743821105370067e+01  6.2788012209191027e+02
+run_forces: ! |2
+    1 -2.0299419751359164e+01  2.6686193378823020e+02  3.2358785870694015e+02
+    2  1.5298617928491225e+02  1.2596516341409203e+02 -1.7961292655338619e+02
+    3 -1.3353630652439830e+02 -3.7923748696131315e+02 -1.4291839793625817e+02
+    4 -7.8374717836161762e+00  2.1276610789823409e+00 -5.5845014473820616e+00
+    5 -2.5014258630866735e+00 -4.0250131424704412e+00  1.2103512372025639e+01
+    6 -8.0681462887292457e+02  9.2165637136761688e+02  1.0270795806932783e+03
+    7  5.5780279349903523e+01 -3.1117530951561656e+02 -1.5746991292869018e+03
+    8  1.3452983055535049e+02 -1.0064659350255911e+02  3.8851791558207651e+02
+    9  7.6746213883425980e+01  8.2501469877402130e+01  3.3944351200617882e+02
+   10  5.2128033527695595e+02 -5.9920098848285863e+02 -1.8126029815043339e+02
+   11 -2.3573118090915246e+00 -5.8616944550888359e+00 -9.6049808811326205e+00
+   12  1.7503975847822900e+01  1.0626930310560814e+01 -8.0603160272054968e+00
+   13  8.0530313322973104e+00 -3.1756495170399117e+00 -1.4618315664740528e-01
+   14 -3.3416065168069773e+00  6.6492606336082150e-01 -8.6345131440469700e+00
+   15 -2.2253843262374914e-01  8.5025661635348779e+00  3.0369735873081622e+00
+   16  4.3476311264989465e+02 -3.1171086735551415e+02 -1.1135217194927448e+03
+   17 -4.2469846140777133e+02  2.9615411776780593e+02  1.1302573488400669e+03
+   18 -1.8849981672825908e-02 -3.3371636477421307e-02  3.0986293443778727e-02
+   19  3.0940277774414027e-04 -2.4634536455373044e-04  1.7433360008861016e-03
+   20 -9.8648131277150790e-04 -1.0112587134526946e-03  3.6932948773965417e-04
+   21 -7.0490745283106378e+01 -7.9749153581142139e+01  2.2171003384646431e+02
+   22 -1.0638717908920071e+02 -2.5949502163177968e+01 -1.6645589526812276e+02
+   23  1.7686797710735027e+02  1.0571018898885514e+02 -5.5243337084099387e+01
+   24  3.8206017656281375e+01 -2.1022820141992960e+02  1.1260711266189014e+02
+   25 -1.4918881473530880e+02  2.3762151395876508e+01 -1.2549188139143085e+02
+   26  1.1097059498808308e+02  1.8645503634228518e+02  1.2861559677865248e+01
+   27  5.0800844984832125e+01 -2.2296588090685469e+02  8.8607367716323253e+01
+   28 -1.7694190504288886e+02  7.6029945485182026e+01 -1.1950518150242071e+02
+   29  1.2614894925528141e+02  1.4694250820033548e+02  3.0893386672863034e+01
+...
diff --git a/unittest/force-styles/tests/mol-pair-lepton_coul.yaml b/unittest/force-styles/tests/mol-pair-lepton_coul.yaml
new file mode 100644
index 0000000000..06dba1ebb1
--- /dev/null
+++ b/unittest/force-styles/tests/mol-pair-lepton_coul.yaml
@@ -0,0 +1,98 @@
+---
+lammps_version: 22 Dec 2022
+date_generated: Fri Jan  6 13:07:07 2023
+epsilon: 5e-13
+skip_tests:
+prerequisites: ! |
+  atom full
+  pair lepton/coul
+pre_commands: ! |
+  variable write_data_pair index ij
+post_commands: ! ""
+input_file: in.fourmol
+pair_style: lepton/coul 8.0
+pair_coeff: ! |
+  * *    "lj+coul;lj=4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.015;sig=3.1;coul=qi*qj/r"
+  1 1    "lj+coul;lj=4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.02;sig=2.5; coul=qi*qj/r"
+  1 2    "lj+coul;lj=4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.01;sig=1.75; coul=qi*qj/r"
+  1 3    "lj+coul;lj=4.0*eps*((sig/r)^12-(sig/r)^6);coul=qi*qj/r*exp(-kappa*r); eps=0.02;sig=2.85; kappa=1.4"
+  1 4*5  'lj+coul;lj=4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.0173205;\tsig=2.8; coul=qi*qj/r'
+  2 2    'lj+coul;lj=4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.005;sig=1.0; coul=qi*qj/r'
+  2 3    'lj+qi*qj/r;lj=4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.01;sig=2.1' 7.0
+  2 4    'qi*qj*recip(r)*4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.005;sig=0.5' 6.0
+  2 5    'lj+coul;lj=4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.00866025;sig=2.05;  coul=qi*qj/r'
+  3 3    'lj+coul;lj=4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.02;sig=3.2; coul=qi*qj/r'
+  3 4    'lj+coul;lj=4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.0173205;sig=3.15; coul=qi*qj/r'
+  3 5    'lj+coul;lj=4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.0173205;sig=3.15; coul=qi*qj/r'
+extract: ! |
+  cut_coul 2
+natoms: 29
+init_vdwl: 0
+init_coul: 821.5981589340703
+init_stress: ! |2-
+   2.0717392629239480e+03  2.1978008748795769e+03  1.4328942671383857e+03 -6.2083752020563338e+02  5.6994144907661041e+02 -1.2054987450344272e+02
+init_forces: ! |2
+    1 -2.1428108724791500e+01  2.7457780352136433e+02  3.3334818906196404e+02
+    2  1.5327322515896745e+02  1.3335315481349363e+02 -1.7964855973984820e+02
+    3 -1.3507218366871652e+02 -3.8727773775819475e+02 -1.4586697469323420e+02
+    4 -9.8073889503850040e+00  2.6059292292027516e+00 -5.0509854554658213e+00
+    5 -4.2334210982020188e+00 -2.9433448862036782e+00  1.3212067069335845e+01
+    6 -7.6465683110851501e+02  6.1860839968068080e+02 -5.5436850502685866e+02
+    7 -2.1067686349905152e+01 -1.7826316587547311e+00 -2.1071937979630817e+01
+    8  1.4439804357757154e+02 -1.0417049227521566e+02  4.0200948231534448e+02
+    9  7.7448575647692280e+01  8.6035840224653199e+01  3.5312911264774755e+02
+   10  5.3137889943602158e+02 -6.1094612504122324e+02 -1.8285534289458627e+02
+   11 -2.1359563768760506e+00 -7.4922123224296424e+00 -1.0168300107889717e+01
+   12  1.7648416505999929e+01  1.1117487405968582e+01 -5.2080419426096007e+00
+   13  9.7461134016771993e+00 -3.9653981312508977e+00 -4.6169582725063507e-01
+   14 -1.4629046448090548e+00 -9.2708173720626008e-01 -9.3160815172733855e+00
+   15  2.3142847651270890e+00  7.9712516289727775e+00  3.4231594444107696e+00
+   16  1.1245251835221486e+01 -1.8917897333687616e+01  1.8382792004905447e+01
+   17  7.7245233417667762e+00 -9.2706355613052622e+00  3.5649985260544508e+00
+   18  4.8507682207052847e+00  1.4770190051651220e+01 -1.5417992172051264e+01
+   19 -3.0569015588084558e+00 -2.4557623214724408e-01 -6.7586483983164003e+00
+   20  5.6849158422160961e+00  8.1460775328385413e+00 -2.5267301017183690e+00
+   21 -6.9068952751966862e+01 -8.0138116375988176e+01  2.1538477896980069e+02
+   22 -1.0415407422776596e+02 -2.8946831224326232e+01 -1.6527204322769362e+02
+   23  1.7850115075306337e+02  1.0085334232387973e+02 -5.3915259879043830e+01
+   24  3.4173068949839234e+01 -2.0194449586908371e+02  1.0982812303394940e+02
+   25 -1.4543956338306600e+02  2.4174388095258362e+01 -1.1555961263369524e+02
+   26  1.1050717698955798e+02  1.8300044636662693e+02  1.6322926420751710e+01
+   27  4.8960638929347951e+01 -2.1594451942422438e+02  8.6425489362011888e+01
+   28 -1.7915626333180091e+02  6.9115187531736083e+01 -1.1640128162803138e+02
+   29  1.2288518282083334e+02  1.4058359742491464e+02  3.4836874368921215e+01
+run_vdwl: 0
+run_coul: 811.4544702033843
+run_stress: ! |2-
+   2.0254673611730052e+03  2.1546399059317710e+03  1.4004786862056685e+03 -5.9753131395499736e+02  5.5083571091239537e+02 -1.0835693140693489e+02
+run_forces: ! |2
+    1 -1.8521335843758965e+01  2.7133645764220114e+02  3.2433574995927717e+02
+    2  1.4809392027442047e+02  1.2917749741377241e+02 -1.7310196073526532e+02
+    3 -1.3304520923162895e+02 -3.7968494911049333e+02 -1.4319959366292466e+02
+    4 -9.7736308287090505e+00  2.5987154015188216e+00 -5.0400007952335040e+00
+    5 -4.2154589459913527e+00 -2.9161272367460431e+00  1.3160135247239504e+01
+    6 -7.2967769309854623e+02  5.9046852573724129e+02 -5.3051467856382442e+02
+    7 -2.0997425589654178e+01 -1.8160544107113790e+00 -2.0891534666943986e+01
+    8  1.2188724749342416e+02 -8.4551328253319213e+01  3.8615306756463031e+02
+    9  7.5011955012730112e+01  8.3255606934348421e+01  3.4219458836850811e+02
+   10  5.2148704316806834e+02 -5.9979324430671443e+02 -1.8043491013717136e+02
+   11 -2.1401078493346208e+00 -7.4442612787591065e+00 -1.0110204992858977e+01
+   12  1.7636474354482754e+01  1.1112699270001475e+01 -5.3437599490543386e+00
+   13  9.6958143635276102e+00 -3.9319928346575428e+00 -4.5785318797926611e-01
+   14 -1.4220607754262848e+00 -9.5007647196679845e-01 -9.1977567175041592e+00
+   15  2.2926582075298532e+00  7.9855204374638635e+00  3.4455597854291540e+00
+   16  1.1272775987202634e+01 -1.8982953731926997e+01  1.8482663994119836e+01
+   17  7.7322649716681617e+00 -9.2832395767254834e+00  3.5764268171001139e+00
+   18  4.8381507413706926e+00  1.4767160007230055e+01 -1.5415297255213202e+01
+   19 -3.0301967571908386e+00 -2.3380675365092468e-01 -6.7543372757881919e+00
+   20  5.6680537800775825e+00  8.1303039215636126e+00 -2.5343304205297250e+00
+   21 -6.8044494839094128e+01 -7.8355487189544334e+01  2.1140553129180978e+02
+   22 -1.0252397670436311e+02 -2.8706370645813312e+01 -1.6229775211078731e+02
+   23  1.7584423056701743e+02  9.8828824413269928e+01 -5.2908452768081467e+01
+   24  3.5846101050381414e+01 -2.0052956252896166e+02  1.1035850811135379e+02
+   25 -1.4625407226107674e+02  2.4065311783305738e+01 -1.1630437467893353e+02
+   26  1.0964729150033426e+02  1.8169458378109391e+02  1.6534914020055329e+01
+   27  4.7966516737697603e+01 -2.1211441650201505e+02  8.4284596173292087e+01
+   28 -1.7587014571064992e+02  6.7684463683262237e+01 -1.1397174911908617e+02
+   29  1.2059531022549152e+02  1.3818820040573286e+02  3.4546805704364580e+01
+...
diff --git a/unittest/force-styles/tests/mol-pair-lepton_coul_long.yaml b/unittest/force-styles/tests/mol-pair-lepton_coul_long.yaml
new file mode 100644
index 0000000000..e9e6e12ec4
--- /dev/null
+++ b/unittest/force-styles/tests/mol-pair-lepton_coul_long.yaml
@@ -0,0 +1,91 @@
+---
+lammps_version: 22 Dec 2022
+date_generated: Fri Jan  6 11:43:19 2023
+epsilon: 5e-14
+skip_tests:
+prerequisites: ! |
+  atom full
+  pair lepton/coul
+  kspace ewald
+pre_commands: ! |
+  variable write_data_pair index ij
+post_commands: ! |
+  kspace_style ewald 1.0e-6
+  kspace_modify gewald 0.3
+  kspace_modify compute no
+input_file: in.fourmol
+pair_style: lepton/coul 8.0 ewald
+pair_coeff: ! |
+  * *   "qi*qj*recip(r)*erfc(alpha*r); alpha=0.3"
+extract: ! |
+  cut_coul 0
+natoms: 29
+init_vdwl: 0
+init_coul: -70.38250510640493
+init_stress: ! |-
+  -2.5008210217356783e+01 -3.8816639759691036e+01 -4.2034333823535029e+01  1.1409444941634803e+00 -4.1295997638172759e+00  8.4201568655309131e+00
+init_forces: ! |2
+    1  2.4892982161789945e+00  5.4110514108668406e-02 -4.4566809618096226e-02
+    2 -5.4331464474966795e-01 -2.1216561683169477e+00 -6.4422859548063194e-01
+    3 -6.0146768249533106e-03 -8.1326388986176695e-02  3.7065361416437584e-02
+    4  8.6681021081257373e-02  1.5580526332113424e-02 -2.0670607018885634e-01
+    5 -3.8522709006299222e-01  7.0133082015759385e-01 -1.0434991847387730e-01
+    6  1.4434447556206305e+00 -2.8440303025280831e+00 -4.6179150068498664e+00
+    7 -4.5864147114531395e-01  5.9024955376048638e-01  4.1553218572882535e+00
+    8 -8.6081518152315473e-01  3.0956066377879825e+00  3.4033032982271463e+00
+    9  1.5261523156304517e+00 -5.2211589506142237e+00  6.4640754802415712e-01
+   10 -3.2801240413423194e-01  4.4014940340050213e-01 -2.4197018970780454e-01
+   11 -9.1061593846995292e-01  1.0375459413421175e+00 -6.1908492313390162e-01
+   12  2.5338520559714999e+00 -8.7929983601815476e-01  1.4596729132964696e+00
+   13 -1.4071672888191875e-01  2.1411576396316268e-01 -2.6081010297459956e-01
+   14 -1.1829747209994355e+00  5.5630332040276298e-01 -1.2672751438959518e-01
+   15  2.0670863023413566e-01 -1.5738997078396350e-02 -1.0480439260753627e+00
+   16 -8.5610959855133872e-01 -2.9498750855107880e-01  3.6594115342400819e+00
+   17 -1.4898051785401010e+00  4.5112516824383366e+00 -7.0891179993525872e+00
+   18  4.2098013755298869e-01  5.3779573566440924e+00 -1.0259427207235309e+01
+   19  2.2670790003068690e+00 -7.5228898694897461e-01  6.7833651123358383e+00
+   20 -3.2376742045417646e+00 -4.5336415466466278e+00  5.2386297902208794e+00
+   21  2.5070017147159147e+00  5.1306536772315816e+00 -1.1181953169079817e+01
+   22  1.8409085017557936e+00 -7.9710212673754954e-01  7.1963402788166793e+00
+   23 -4.7557630605535390e+00 -3.9276462944101285e+00  4.4810840896139208e+00
+   24 -2.0947896827942634e+00  1.1192949743826818e+01 -6.2310311947608952e+00
+   25  3.6287203008748117e+00 -3.7687374993637501e+00  4.1890995206612853e+00
+   26 -2.0869274982695774e+00 -7.6713243106409479e+00  1.4445429367653724e+00
+   27 -2.2885314331888633e+00  1.2284277786306042e+01 -4.4417851629928391e+00
+   28  4.8138126021013559e+00 -5.2552567477954808e+00  4.0949271840163188e+00
+   29 -2.1387057387936306e+00 -7.0378870630657362e+00  3.2854636539119964e-01
+run_vdwl: 0
+run_coul: -70.44917655348034
+run_stress: ! |-
+  -2.5113123634782742e+01 -3.8829080724625797e+01 -4.1974866539740219e+01  1.0992234948320725e+00 -4.2133088504442426e+00  8.3411051648873880e+00
+run_forces: ! |2
+    1  2.4901412551649300e+00  5.7578653428409510e-02 -4.1371607623628126e-02
+    2 -5.5016258001424978e-01 -2.1253709224208284e+00 -6.4675464562515428e-01
+    3 -6.1181321977963887e-03 -8.1603298903751717e-02  3.6960785600450803e-02
+    4  8.7664325692039677e-02  1.5421259781568622e-02 -2.0732825200733390e-01
+    5 -3.8426968316648857e-01  7.0133255072202327e-01 -1.0476754203354563e-01
+    6  1.4397341598006732e+00 -2.8402394782115676e+00 -4.6100516195557670e+00
+    7 -4.5653174471706293e-01  5.8740258024502268e-01  4.1428488591886587e+00
+    8 -8.5296361579479296e-01  3.0913766100847191e+00  3.4036983616153718e+00
+    9  1.5243103951275745e+00 -5.2263379168195243e+00  6.5393457865716798e-01
+   10 -3.2821334506572997e-01  4.3971066168353573e-01 -2.4286942093472491e-01
+   11 -9.1028816015853564e-01  1.0378115705103634e+00 -6.1764845972882765e-01
+   12  2.5331296442141347e+00 -8.7784221234396187e-01  1.4615298895650435e+00
+   13 -1.4156045923285593e-01  2.1289742571056952e-01 -2.6069154431898300e-01
+   14 -1.1831640746902548e+00  5.5740094013927854e-01 -1.2578321992777053e-01
+   15  2.0841553748238612e-01 -1.7735699273721650e-02 -1.0507998538736403e+00
+   16 -8.5592937232497124e-01 -2.9499330116027700e-01  3.6596869057608190e+00
+   17 -1.4921853960121267e+00  4.5201460939997453e+00 -7.0955732968497722e+00
+   18  3.7454832859071807e-01  5.3289765761099757e+00 -1.0216517192599614e+01
+   19  2.3040416130164512e+00 -7.2163590974331215e-01  6.7964873033426567e+00
+   20 -3.2272895016107213e+00 -4.5149041691827723e+00  5.1825710348837140e+00
+   21  2.5158885590673008e+00  5.1066184546650719e+00 -1.1178503591543096e+01
+   22  1.8668709182252012e+00 -7.7491832605292943e-01  7.2018200873121181e+00
+   23 -4.7901034321003175e+00 -3.9269711077380931e+00  4.4714696806765986e+00
+   24 -2.1181643059791524e+00  1.1212571247691631e+01 -6.2484485049803284e+00
+   25  3.6682989527010301e+00 -3.7627024244034528e+00  4.2233692491713928e+00
+   26 -2.1013736492934769e+00 -7.6947415821246645e+00  1.4312211135584418e+00
+   27 -2.3073378850139967e+00  1.2300051223668088e+01 -4.4256454159786633e+00
+   28  4.8331575574345518e+00 -5.2611718528271307e+00  4.0937238161728597e+00
+   29 -2.1405459091444592e+00 -7.0481276472340166e+00  3.1343250207555534e-01
+...
diff --git a/unittest/force-styles/tests/mol-pair-lj_cut.yaml b/unittest/force-styles/tests/mol-pair-lj_cut.yaml
index 58bb0abf08..68bba170fe 100644
--- a/unittest/force-styles/tests/mol-pair-lj_cut.yaml
+++ b/unittest/force-styles/tests/mol-pair-lj_cut.yaml
@@ -1,6 +1,6 @@
 ---
-lammps_version: 17 Feb 2022
-date_generated: Fri Mar 18 22:17:31 2022
+lammps_version: 22 Dec 2022
+date_generated: Thu Dec 22 09:53:54 2022
 epsilon: 5e-14
 skip_tests:
 prerequisites: ! |
@@ -9,6 +9,7 @@ prerequisites: ! |
 pre_commands: ! ""
 post_commands: ! |
   pair_modify mix arithmetic
+  pair_modify shift yes
 input_file: in.fourmol
 pair_style: lj/cut 8.0
 pair_coeff: ! |
@@ -22,7 +23,7 @@ extract: ! |
   epsilon 2
   sigma 2
 natoms: 29
-init_vdwl: 749.2372261744105
+init_vdwl: 749.2470096189502
 init_coul: 0
 init_stress: ! |2-
    2.1793857186503233e+03  2.1988957679770601e+03  4.6653994738862330e+03 -7.5956544622684294e+02  2.4751393539192360e+01  6.6652061873806701e+02
@@ -56,7 +57,7 @@ init_forces: ! |2
    27  5.1810412832327984e+01 -2.2705468907750401e+02  9.0849153441059272e+01
    28 -1.8041315533250560e+02  7.7534079082878250e+01 -1.2206962452216491e+02
    29  1.2861063251415729e+02  1.4952718246094855e+02  3.1216040111076961e+01
-run_vdwl: 719.4434555542921
+run_vdwl: 719.4532389988314
 run_coul: 0
 run_stress: ! |2-
    2.1330157554553721e+03  2.1547730555430498e+03  4.3976512412988704e+03 -7.3873325485023690e+02  4.1743707190786367e+01  6.2788040986774604e+02
diff --git a/unittest/utils/CMakeLists.txt b/unittest/utils/CMakeLists.txt
index c01313ad8d..a6d5545873 100644
--- a/unittest/utils/CMakeLists.txt
+++ b/unittest/utils/CMakeLists.txt
@@ -19,6 +19,12 @@ add_executable(test_platform test_platform.cpp)
 target_link_libraries(test_platform PRIVATE lammps GTest::GMockMain)
 add_test(NAME Platform COMMAND test_platform)
 
+if(PKG_LEPTON)
+  add_executable(test_lepton test_lepton.cpp)
+  target_link_libraries(test_lepton PRIVATE lepton lammps GTest::GMockMain)
+  add_test(NAME Lepton COMMAND test_lepton)
+endif()
+
 set_tests_properties(Utils Platform PROPERTIES
   ENVIRONMENT "LAMMPS_POTENTIALS=${LAMMPS_POTENTIALS_DIR}")
 
diff --git a/unittest/utils/test_lepton.cpp b/unittest/utils/test_lepton.cpp
new file mode 100644
index 0000000000..91532b385a
--- /dev/null
+++ b/unittest/utils/test_lepton.cpp
@@ -0,0 +1,525 @@
+
+// Adapted for GoogleTest from TestParser.cpp from OpenMM
+
+#include "lammps.h"
+
+#include "info.h"
+#include "input.h"
+#include "update.h"
+#include "variable.h"
+
+#include "../../src/LEPTON/lepton_utils.h"
+#include "Lepton.h"
+#include "lepton/CompiledVectorExpression.h"
+#include "utils.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "../testing/core.h"
+
+#include <exception>
+#include <iostream>
+#include <limits>
+#include <map>
+#include <sstream>
+#include <string>
+
+using LAMMPS_NS::utils::split_words;
+using ::testing::StrEq;
+
+bool verbose = false;
+
+class LeptonUtilsTest : public LAMMPSTest {
+protected:
+    LAMMPS_NS::Variable *variable;
+
+    void SetUp() override
+    {
+        testbinary = "LeptonUtilsTest";
+        args       = {"-log", "none", "-echo", "screen", "-nocite", "-v", "num", "1"};
+        LAMMPSTest::SetUp();
+        BEGIN_HIDE_OUTPUT();
+        command("region box block 0 1 0 1 0 1");
+        command("create_box 1 box");
+        END_HIDE_OUTPUT();
+        variable = lmp->input->variable;
+    }
+};
+
+// remove quotes and spaces from expression
+
+TEST(LeptonUtils, condense)
+{
+    ASSERT_THAT(LeptonUtils::condense("\"k*r^2; k=250.0\""), StrEq("k*r^2;k=250.0"));
+    ASSERT_THAT(LeptonUtils::condense("'k2*r^2 + k3*r^3 + k4*r^4; k2=300.0; k3=-100.0; k4=50.0'"),
+                StrEq("k2*r^2+k3*r^3+k4*r^4;k2=300.0;k3=-100.0;k4=50.0"));
+    ASSERT_THAT(LeptonUtils::condense("k*(r-0.2)^2;k=500.0"), StrEq("k*(r-0.2)^2;k=500.0"));
+    ASSERT_THAT(LeptonUtils::condense("\"xx' \"'xx"), StrEq("xxxx"));
+    ASSERT_THAT(LeptonUtils::condense("\t \"x\n\r"), StrEq("x"));
+}
+
+// substitute variable references (v_<name>) with values
+
+TEST_F(LeptonUtilsTest, substitute)
+{
+    BEGIN_HIDE_OUTPUT();
+    command("variable val1 index 100.0");
+    command("variable pre equal 0.001*step");
+    END_HIDE_OUTPUT();
+    ASSERT_THAT(LeptonUtils::substitute("v_num", lmp), StrEq("1"));
+    ASSERT_THAT(LeptonUtils::substitute("eps*v_val1*k", lmp), StrEq("eps*100.0*k"));
+    ASSERT_THAT(LeptonUtils::substitute("(2.5/v_pre)", lmp), StrEq("(2.5/0)"));
+    lmp->update->reset_timestep(100LL, false);
+    ASSERT_THAT(LeptonUtils::substitute("(2.5/v_pre)", lmp), StrEq("(2.5/0.1)"));
+
+    if (LAMMPS_NS::Info::has_exceptions()) {
+        bool caught = false;
+        try {
+            LeptonUtils::substitute("v_none", lmp);
+        } catch (std::exception &e) {
+            ASSERT_THAT(e.what(), StrEq("Variable none in expression v_none does not exist"));
+            caught = true;
+        }
+        ASSERT_TRUE(caught);
+    }
+}
+
+/**
+ * This is a custom function equal to f(x,y) = 2*x*y.
+ */
+
+class ExampleFunction : public Lepton::CustomFunction {
+    int getNumArguments() const { return 2; }
+    double evaluate(const double *arguments) const { return 2.0 * arguments[0] * arguments[1]; }
+    double evaluateDerivative(const double *arguments, const int *derivOrder) const
+    {
+        if (derivOrder[0] == 1) {
+            if (derivOrder[1] == 0)
+                return 2.0 * arguments[1];
+            else if (derivOrder[1] == 1)
+                return 2.0;
+        }
+        if (derivOrder[1] == 1 && derivOrder[0] == 0) return 2.0 * arguments[0];
+        return 0.0;
+    }
+    Lepton::CustomFunction *clone() const { return new ExampleFunction(); }
+};
+
+/**
+ * Verify that an expression gives the correct value.
+ */
+
+void verifyEvaluation(const std::string &expression, double expectedValue)
+{
+    std::map<std::string, Lepton::CustomFunction *> customFunctions;
+    Lepton::ParsedExpression parsed = Lepton::Parser::parse(expression, customFunctions);
+    double value                    = parsed.evaluate();
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+
+    // Try optimizing it and make sure the result is still correct.
+
+    value = parsed.optimize().evaluate();
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+
+    // Create an ExpressionProgram and see if that also gives the same result.
+
+    Lepton::ExpressionProgram program = parsed.createProgram();
+    value                             = program.evaluate();
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+
+    // Create a CompiledExpression and see if that also gives the same result.
+
+    Lepton::CompiledExpression compiled = parsed.createCompiledExpression();
+    value                               = compiled.evaluate();
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+}
+
+/**
+ * Verify that an expression with variables gives the correct value.
+ */
+
+void verifyEvaluation(const std::string &expression, double x, double y, double expectedValue)
+{
+    if (verbose) std::cout << "Checking expression: " << expression << "\n";
+    std::map<std::string, double> variables;
+    variables["x"]                  = x;
+    variables["y"]                  = y;
+    Lepton::ParsedExpression parsed = Lepton::Parser::parse(expression);
+    double value                    = parsed.evaluate(variables);
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+
+    // Try optimizing it and make sure the result is still correct.
+
+    value = parsed.optimize().evaluate(variables);
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+
+    // Try optimizing with predefined values for the variables.
+
+    value = parsed.optimize(variables).evaluate();
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+
+    // Create an ExpressionProgram and see if that also gives the same result.
+
+    Lepton::ExpressionProgram program = parsed.createProgram();
+    value                             = program.evaluate(variables);
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+
+    // Create a CompiledExpression and see if that also gives the same result.
+
+    Lepton::CompiledExpression compiled = parsed.createCompiledExpression();
+    if (compiled.getVariables().find("x") != compiled.getVariables().end())
+        compiled.getVariableReference("x") = x;
+    if (compiled.getVariables().find("y") != compiled.getVariables().end())
+        compiled.getVariableReference("y") = y;
+    value = compiled.evaluate();
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+
+    // Try specifying memory locations for the compiled expression.
+
+    std::map<std::string, double *> variablePointers;
+    variablePointers["x"]                = &x;
+    variablePointers["y"]                = &y;
+    Lepton::CompiledExpression compiled2 = parsed.createCompiledExpression();
+    compiled2.setVariableLocations(variablePointers);
+    value = compiled2.evaluate();
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+    ASSERT_EQ(&x, &compiled2.getVariableReference("x"));
+    ASSERT_EQ(&y, &compiled2.getVariableReference("y"));
+
+    // Try evaluating it as a vector.
+
+    for (int width : Lepton::CompiledVectorExpression::getAllowedWidths()) {
+        Lepton::CompiledVectorExpression vector = parsed.createCompiledVectorExpression(width);
+        for (int i = 0; i < width; i++) {
+            if (vector.getVariables().find("x") != vector.getVariables().end())
+                for (int j = 0; j < width; j++)
+                    vector.getVariablePointer("x")[j] = (i == j ? x : -100.0);
+            if (vector.getVariables().find("y") != vector.getVariables().end())
+                for (int j = 0; j < width; j++)
+                    vector.getVariablePointer("y")[j] = (i == j ? y : -100.0);
+            const float *result = vector.evaluate();
+            ASSERT_NEAR(expectedValue, result[i], 1e-6);
+        }
+    }
+
+    // Specify memory locations for the vector expression.
+
+    float xvec[8], yvec[8];
+    std::map<std::string, float *> vecVariablePointers;
+    vecVariablePointers["x"] = xvec;
+    vecVariablePointers["y"] = yvec;
+    for (int width : Lepton::CompiledVectorExpression::getAllowedWidths()) {
+        Lepton::CompiledVectorExpression vector2 = parsed.createCompiledVectorExpression(width);
+        vector2.setVariableLocations(vecVariablePointers);
+        for (int i = 0; i < width; i++) {
+            for (int j = 0; j < width; j++) {
+                xvec[j] = (i == j ? x : -100.0);
+                yvec[j] = (i == j ? y : -100.0);
+            }
+            const float *result = vector2.evaluate();
+            ASSERT_NEAR(expectedValue, result[i], 1e-6);
+        }
+    }
+
+    // Make sure that variable renaming works.
+
+    variables.clear();
+    variables["w"] = x;
+    variables["y"] = y;
+    std::map<std::string, std::string> replacements;
+    replacements["x"] = "w";
+    value             = parsed.renameVariables(replacements).evaluate(variables);
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+}
+
+/**
+ * Confirm that a parse error gets thrown.
+ */
+
+void verifyInvalidExpression(const std::string &expression)
+{
+    if (verbose) std::cout << "Checking invalid expression: " << expression << "\n";
+    try {
+        Lepton::Parser::parse(expression);
+    } catch (const std::exception &) {
+        return;
+    }
+    throw std::exception();
+}
+
+/**
+ * Verify that two numbers have the same value.
+ */
+
+void assertNumbersEqual(double val1, double val2, double tol = 1e-10)
+{
+    const double inf = std::numeric_limits<double>::infinity();
+    if (val1 == val1 || val2 == val2)           // If both are NaN, that's fine.
+        if (val1 != inf || val2 != inf)         // Both infinity is also fine.
+            if (val1 != -inf || val2 != -inf) { // Same for -infinity.
+                ASSERT_NEAR(val1, val2, tol);
+            }
+}
+
+/**
+ * Verify that two expressions give the same value.
+ */
+
+void verifySameValue(const Lepton::ParsedExpression &exp1, const Lepton::ParsedExpression &exp2,
+                     double x, double y)
+{
+    std::map<std::string, double> variables;
+    variables["x"] = x;
+    variables["y"] = y;
+    double val1    = exp1.evaluate(variables);
+    double val2    = exp2.evaluate(variables);
+    assertNumbersEqual(val1, val2);
+
+    // Now create CompiledExpressions from them and see if those also match.
+
+    Lepton::CompiledExpression compiled1 = exp1.createCompiledExpression();
+    Lepton::CompiledExpression compiled2 = exp2.createCompiledExpression();
+    if (compiled1.getVariables().find("x") != compiled1.getVariables().end())
+        compiled1.getVariableReference("x") = x;
+    if (compiled1.getVariables().find("y") != compiled1.getVariables().end())
+        compiled1.getVariableReference("y") = y;
+    if (compiled2.getVariables().find("x") != compiled2.getVariables().end())
+        compiled2.getVariableReference("x") = x;
+    if (compiled2.getVariables().find("y") != compiled2.getVariables().end())
+        compiled2.getVariableReference("y") = y;
+    assertNumbersEqual(val1, compiled1.evaluate());
+    assertNumbersEqual(val2, compiled2.evaluate());
+
+    // Now check CompiledVectorizedExpressions.
+
+    for (int width : Lepton::CompiledVectorExpression::getAllowedWidths()) {
+        Lepton::CompiledVectorExpression vector1 = exp1.createCompiledVectorExpression(width);
+        Lepton::CompiledVectorExpression vector2 = exp2.createCompiledVectorExpression(width);
+        for (int i = 0; i < width; i++) {
+            if (vector1.getVariables().find("x") != vector1.getVariables().end())
+                for (int j = 0; j < width; j++)
+                    vector1.getVariablePointer("x")[j] = (i == j ? x : -100.0);
+            if (vector1.getVariables().find("y") != vector1.getVariables().end())
+                for (int j = 0; j < width; j++)
+                    vector1.getVariablePointer("y")[j] = (i == j ? y : -100.0);
+            if (vector2.getVariables().find("x") != vector2.getVariables().end())
+                for (int j = 0; j < width; j++)
+                    vector2.getVariablePointer("x")[j] = (i == j ? x : -100.0);
+            if (vector2.getVariables().find("y") != vector2.getVariables().end())
+                for (int j = 0; j < width; j++)
+                    vector2.getVariablePointer("y")[j] = (i == j ? y : -100.0);
+            const float *result1 = vector1.evaluate();
+            const float *result2 = vector2.evaluate();
+            assertNumbersEqual(val1, result1[i], 5e-6);
+            assertNumbersEqual(val2, result2[i], 5e-6);
+        }
+    }
+}
+
+/**
+ * Verify that the derivative of an expression is calculated correctly.
+ */
+
+void verifyDerivative(const std::string &expression, const std::string &expectedDeriv)
+{
+    if (verbose) std::cout << "Checking derivative of: " << expression << "\n";
+    Lepton::ParsedExpression computed =
+        Lepton::Parser::parse(expression).differentiate("x").optimize();
+    Lepton::ParsedExpression expected = Lepton::Parser::parse(expectedDeriv);
+    verifySameValue(computed, expected, 1.0, 2.0);
+    verifySameValue(computed, expected, 2.0, 3.0);
+    verifySameValue(computed, expected, -2.0, 3.0);
+    verifySameValue(computed, expected, 2.0, -3.0);
+    verifySameValue(computed, expected, 0.0, -3.0);
+    verifySameValue(computed, expected, 2.0, 0.0);
+}
+
+/**
+ * Test the use of a custom function.
+ */
+
+void testCustomFunction(const std::string &expression, const std::string &equivalent)
+{
+    if (verbose) std::cout << "Checking custom function expression: " << expression << "\n";
+    std::map<std::string, Lepton::CustomFunction *> functions;
+    ExampleFunction exp;
+    functions["custom"]           = &exp;
+    Lepton::ParsedExpression exp1 = Lepton::Parser::parse(expression, functions);
+    Lepton::ParsedExpression exp2 = Lepton::Parser::parse(equivalent);
+    verifySameValue(exp1, exp2, 1.0, 2.0);
+    verifySameValue(exp1, exp2, 2.0, 3.0);
+    verifySameValue(exp1, exp2, -2.0, 3.0);
+    verifySameValue(exp1, exp2, 2.0, -3.0);
+    Lepton::ParsedExpression deriv1 = exp1.differentiate("x").optimize();
+    Lepton::ParsedExpression deriv2 = exp2.differentiate("x").optimize();
+    verifySameValue(deriv1, deriv2, 1.0, 2.0);
+    verifySameValue(deriv1, deriv2, 2.0, 3.0);
+    verifySameValue(deriv1, deriv2, -2.0, 3.0);
+    verifySameValue(deriv1, deriv2, 2.0, -3.0);
+    Lepton::ParsedExpression deriv3 = deriv1.differentiate("y").optimize();
+    Lepton::ParsedExpression deriv4 = deriv2.differentiate("y").optimize();
+    verifySameValue(deriv3, deriv4, 1.0, 2.0);
+    verifySameValue(deriv3, deriv4, 2.0, 3.0);
+    verifySameValue(deriv3, deriv4, -2.0, 3.0);
+    verifySameValue(deriv3, deriv4, 2.0, -3.0);
+}
+
+TEST(Lepton, Evaluation)
+{
+    verifyEvaluation("5", 5.0);
+    verifyEvaluation("5*2", 10.0);
+    verifyEvaluation("2*3+4*5", 26.0);
+    verifyEvaluation("2^-3", 0.125);
+    verifyEvaluation("1e+2", 100.0);
+    verifyEvaluation("-x", 2.0, 3.0, -2.0);
+    verifyEvaluation("y^-x", 3.0, 2.0, 0.125);
+    verifyEvaluation("1/-x", 3.0, 2.0, -1.0 / 3.0);
+    verifyEvaluation("2.1e-4*x*(y+1)", 3.0, 1.0, 1.26e-3);
+    verifyEvaluation("sin(2.5)", std::sin(2.5));
+    verifyEvaluation("cot(x)", 3.0, 1.0, 1.0 / std::tan(3.0));
+    verifyEvaluation("log(x)", 3.0, 1.0, std::log(3.0));
+    verifyEvaluation("x^2+y^3+x^-1+y^(1/2)", 1.0, 1.0, 4.0);
+    verifyEvaluation("(2*x)*3", 4.0, 4.0, 24.0);
+    verifyEvaluation("(x*2)*3", 4.0, 4.0, 24.0);
+    verifyEvaluation("2*(x*3)", 4.0, 4.0, 24.0);
+    verifyEvaluation("2*(3*x)", 4.0, 4.0, 24.0);
+    verifyEvaluation("2*x/3", 1.0, 4.0, 2.0 / 3.0);
+    verifyEvaluation("x*2/3", 1.0, 4.0, 2.0 / 3.0);
+    verifyEvaluation("5*(-x)*(-y)", 1.0, 4.0, 20.0);
+    verifyEvaluation("5*(-x)*(y)", 1.0, 4.0, -20.0);
+    verifyEvaluation("5*(x)*(-y)", 1.0, 4.0, -20.0);
+    verifyEvaluation("5*(-x)/(-y)", 1.0, 4.0, 1.25);
+    verifyEvaluation("5*(-x)/(y)", 1.0, 4.0, -1.25);
+    verifyEvaluation("5*(x)/(-y)", 1.0, 4.0, -1.25);
+    verifyEvaluation("x+(-y)", 1.0, 4.0, -3.0);
+    verifyEvaluation("(-x)+y", 1.0, 4.0, 3.0);
+    verifyEvaluation("x/(1/y)", 1.0, 4.0, 4.0);
+    verifyEvaluation("x*w; w = 5", 3.0, 1.0, 15.0);
+    verifyEvaluation("a+b^2;a=x-b;b=3*y", 2.0, 3.0, 74.0);
+    verifyEvaluation("erf(x)+erfc(x)", 2.0, 3.0, 1.0);
+    verifyEvaluation("min(3, x)", 2.0, 3.0, 2.0);
+    verifyEvaluation("min(y, 5)", 2.0, 3.0, 3.0);
+    verifyEvaluation("max(x, y)", 2.0, 3.0, 3.0);
+    verifyEvaluation("max(x, -1)", 2.0, 3.0, 2.0);
+    verifyEvaluation("abs(x-y)", 2.0, 3.0, 1.0);
+    verifyEvaluation("delta(x)+3*delta(y-1.5)", 2.0, 1.5, 3.0);
+    verifyEvaluation("step(x-3)+y*step(x)", 2.0, 3.0, 3.0);
+    verifyEvaluation("floor(x)", -2.1, 3.0, -3.0);
+    verifyEvaluation("ceil(x)", -2.1, 3.0, -2.0);
+    verifyEvaluation("select(x, 1.0, y)", 0.3, 2.0, 1.0);
+    verifyEvaluation("select(x, 1.0, y)", 0.0, 2.0, 2.0);
+    verifyEvaluation("atan2(x, y)", 3.0, 1.5, std::atan(2.0));
+    verifyEvaluation("sqrt(x^2)", -2.2, 0.0, 2.2);
+    verifyEvaluation("sqrt(x)^2", 2.2, 0.0, 2.2);
+    verifyEvaluation("x^2+x^4", 2.0, 0.0, 20.0);
+    verifyEvaluation("x^-2+x^-3", 2.0, 0.0, 0.375);
+    verifyEvaluation("x^1.8", 2.2, 0.0, std::pow(2.2, 1.8));
+}
+
+TEST(Lepton, InvalidEvaluation)
+{
+    ASSERT_NO_THROW(verifyInvalidExpression("1..2"));
+    ASSERT_NO_THROW(verifyInvalidExpression("1*(2+3"));
+    ASSERT_NO_THROW(verifyInvalidExpression("5++4"));
+    ASSERT_NO_THROW(verifyInvalidExpression("1+2)"));
+    ASSERT_NO_THROW(verifyInvalidExpression("cos(2,3)"));
+}
+
+TEST(Lepton, VerifyDerivative)
+{
+    verifyDerivative("x", "1");
+    verifyDerivative("x^2+x", "2*x+1");
+    verifyDerivative("y^x-x", "log(y)*(y^x)-1");
+    verifyDerivative("sin(x)", "cos(x)");
+    verifyDerivative("cos(x)", "-sin(x)");
+    verifyDerivative("tan(x)", "square(sec(x))");
+    verifyDerivative("cot(x)", "-square(csc(x))");
+    verifyDerivative("sec(x)", "sec(x)*tan(x)");
+    verifyDerivative("csc(x)", "-csc(x)*cot(x)");
+    verifyDerivative("exp(2*x)", "2*exp(2*x)");
+    verifyDerivative("log(x)", "1/x");
+    verifyDerivative("sqrt(x)", "0.5/sqrt(x)");
+    verifyDerivative("asin(x)", "1/sqrt(1-x^2)");
+    verifyDerivative("acos(x)", "-1/sqrt(1-x^2)");
+    verifyDerivative("atan(x)", "1/(1+x^2)");
+    verifyDerivative("atan2(2*x,y)", "2*y/(4*x^2+y^2)");
+    verifyDerivative("sinh(x)", "cosh(x)");
+    verifyDerivative("cosh(x)", "sinh(x)");
+    verifyDerivative("tanh(x)", "1/(cosh(x)^2)");
+    verifyDerivative("erf(x)", "1.12837916709551*exp(-x^2)");
+    verifyDerivative("erfc(x)", "-1.12837916709551*exp(-x^2)");
+    verifyDerivative("step(x)*x+step(1-x)*2*x", "step(x)+step(1-x)*2");
+    verifyDerivative("recip(x)", "-1/x^2");
+    verifyDerivative("square(x)", "2*x");
+    verifyDerivative("cube(x)", "3*x^2");
+    verifyDerivative("min(x, 2*x)", "step(x-2*x)*2+(1-step(x-2*x))*1");
+    verifyDerivative("max(5, x^2)", "(1-step(5-x^2))*2*x");
+    verifyDerivative("abs(3*x)", "step(3*x)*3+(1-step(3*x))*-3");
+    verifyDerivative("floor(x)+0.5*x*ceil(x)", "0.5*ceil(x)");
+    verifyDerivative("select(x, x^2, 3*x)", "select(x, 2*x, 3)");
+}
+
+TEST(Lepton, CustomFunction)
+{
+    testCustomFunction("custom(x, y)/2", "x*y");
+    testCustomFunction("custom(x^2, 1)+custom(2, y-1)", "2*x^2+4*(y-1)");
+}
+
+TEST(Lepton, Optimize)
+{
+    std::string buffer;
+    std::stringstream out(buffer);
+
+    out << Lepton::Parser::parse("x*x").optimize();
+    ASSERT_THAT(out.str(), StrEq("square(x)"));
+    out.str("");
+
+    out << Lepton::Parser::parse("x*x*x").optimize();
+    ASSERT_THAT(out.str(), StrEq("cube(x)"));
+    out.str("");
+
+    out << Lepton::Parser::parse("x*(x*x)").optimize();
+    ASSERT_THAT(out.str(), StrEq("cube(x)"));
+    out.str("");
+
+    out << Lepton::Parser::parse("(x*x)*x").optimize();
+    ASSERT_THAT(out.str(), StrEq("cube(x)"));
+    out.str("");
+
+    out << Lepton::Parser::parse("2*3*x").optimize();
+    ASSERT_THAT(out.str(), StrEq("6*(x)"));
+    out.str("");
+
+    out << Lepton::Parser::parse("1/(1+x)").optimize();
+    ASSERT_THAT(out.str(), StrEq("recip(1+(x))"));
+    out.str("");
+
+    out << Lepton::Parser::parse("x^(1/2)").optimize();
+    ASSERT_THAT(out.str(), StrEq("sqrt(x)"));
+    out.str("");
+    out << Lepton::Parser::parse("log(3*cos(x))^(sqrt(4)-2)").optimize();
+    ASSERT_THAT(out.str(), StrEq("1"));
+    out.str("");
+}
+
+int main(int argc, char **argv)
+{
+    MPI_Init(&argc, &argv);
+    ::testing::InitGoogleMock(&argc, argv);
+
+    // handle arguments passed via environment variable
+    if (const char *var = getenv("TEST_ARGS")) {
+        std::vector<std::string> env = split_words(var);
+        for (auto arg : env) {
+            if (arg == "-v") {
+                verbose = true;
+            }
+        }
+    }
+    if ((argc > 1) && (strcmp(argv[1], "-v") == 0)) verbose = true;
+
+    int rv = RUN_ALL_TESTS();
+    MPI_Finalize();
+    return rv;
+}